diff --git a/.flake8 b/.flake8 index 6b80389b..96a1bcff 100644 --- a/.flake8 +++ b/.flake8 @@ -4,7 +4,7 @@ max-line-length = 160 max-complexity = 13 inline-quotes = " -application-import-names = dedoc, tests +application-import-names = dedoc, tests, scripts, train_dataset import-order-style = pycharm exclude = @@ -14,8 +14,6 @@ exclude = .github, *__init__.py, resources, - dedoc/scripts, - examples, venv, build, dedoc.egg-info @@ -24,3 +22,5 @@ exclude = # ANN101 - type annotations for self ignore = ANN101 +per-file-ignores = + scripts/*:T201 diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index a628b0b1..4cb468e8 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -32,3 +32,4 @@ jobs: cd docs/source/_static/code_examples python dedoc_usage_tutorial.py python dedoc_add_new_doc_type_tutorial.py + python dedoc_add_new_structure_type_tutorial.py diff --git a/.github/workflows/test_labeling.yaml b/.github/workflows/test_labeling.yaml new file mode 100644 index 00000000..be726ca0 --- /dev/null +++ b/.github/workflows/test_labeling.yaml @@ -0,0 +1,34 @@ +name: CI + +# Controls when the action will run. +on: + pull_request: + branches: + - develop + - master + paths-ignore: + - 'VERSION' + - 'docs/source/changelog.rst' + push: + branches: + - develop + - master + paths-ignore: + - 'VERSION' + - 'docs/source/changelog.rst' + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + labeling: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: '3.9' + - name: Run tests for labeling + run: | + test="true" docker-compose -f labeling/docker-compose.yml up --build --exit-code-from test diff --git a/.github/workflows/test_skip.yaml b/.github/workflows/test_skip.yaml new file mode 100644 index 00000000..44a5ef2d --- /dev/null +++ b/.github/workflows/test_skip.yaml @@ -0,0 +1,17 @@ +name: Skip CI + +on: + push: + branches: + - new_version + paths: + - 'VERSION' + - 'docs/source/changelog.rst' + workflow_dispatch: + +jobs: + pipeline: + runs-on: ubuntu-latest + steps: + - name: Skip tests (only VERSION and changelog have been changed) + run: echo "This is used to meet the requirements of pull-request to the develop branch (pipeline should pass)" diff --git a/.gitignore b/.gitignore index 16467ec7..b875bf22 100644 --- a/.gitignore +++ b/.gitignore @@ -96,7 +96,6 @@ ENV/ [Ll]ib [Ll]ib64 [Ll]ocal -[Ss]cripts pyvenv.cfg .venv pip-selfcheck.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 012ba03a..76ee04b4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: 5.0.4 hooks: - id: flake8 - exclude: \.github|.*__init__\.py|resources|dedoc/scripts|examples|docs|venv|build|dedoc\.egg-info + exclude: \.github|.*__init__\.py|resources|docs|venv|build|dedoc\.egg-info args: - "--config=.flake8" additional_dependencies: [ diff --git a/Dockerfile b/Dockerfile index 09632601..779508df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,4 +20,4 @@ RUN echo "__version__ = \"$(cat /dedoc_root/VERSION)\"" > /dedoc_root/dedoc/vers ADD tests /dedoc_root/tests ADD resources /dedoc_root/resources -CMD ["python3", "/dedoc_root/dedoc/main.py", "-c", "/dedoc_root/dedoc/config.py"] +CMD ["python3", "/dedoc_root/dedoc/main.py"] diff --git a/VERSION b/VERSION index 415b19fc..42f7d233 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0 \ No newline at end of file +2.1 \ No newline at end of file diff --git a/dedoc/api/api_utils.py b/dedoc/api/api_utils.py index df8a1286..1287912d 100644 --- a/dedoc/api/api_utils.py +++ b/dedoc/api/api_utils.py @@ -133,7 +133,7 @@ def json2html(text: str, paragraph: TreeNode, tables: Optional[List[Table]], tab if tables is not None and len(tables) > 0: text += "

Tables:

" for table in tables: - text += __table2html(table, table2id) + text += table2html(table, table2id) text += "

 

" return text @@ -201,7 +201,7 @@ def __annotations2html(paragraph: TreeNode, table2id: Dict[str, int]) -> str: return text.replace("\n", "
") -def __table2html(table: Table, table2id: Dict[str, int]) -> str: +def table2html(table: Table, table2id: Dict[str, int]) -> str: uid = table.metadata.uid text = f"

table {table2id[uid]}:

" text += f'\n\n' diff --git a/dedoc/api/web/index.html b/dedoc/api/web/index.html index 27b72fdb..6a947230 100644 --- a/dedoc/api/web/index.html +++ b/dedoc/api/web/index.html @@ -10,7 +10,9 @@ @@ -92,11 +94,13 @@

Attachments handling

-

Tables handling

+

Tables handling

need_pdf_table_analysis, orient_analysis_cells, orient_cell_angle

- +

@@ -188,9 +192,9 @@

Other formats handling


-
-
-
+
+
+
diff --git a/dedoc/api/web/train_dataset/info_labeling_mode.html b/dedoc/api/web/train_dataset/info_labeling_mode.html deleted file mode 100644 index 1fe3ddfb..00000000 --- a/dedoc/api/web/train_dataset/info_labeling_mode.html +++ /dev/null @@ -1,38 +0,0 @@ - - - - - Dedoc | prepare label data - - - - -
- -

Процесс создания датасетов и обучения классификаторов

- -

Шаг 1 - Формирование заданий для системы разметки

-
    -
  1. - Запустите dedoc в режиме разметки путем включения строки labeling_mode=True в ваш конфиг файл config.py. -
  2. -
  3. - Подготовьте данные для разметки в виде архива документов, которые необходимо обработать. -
  4. -
  5. - Для формирования заданий для внешней системы разметки идем сюда и загружаем подготовленный архив с необходимыми параметрами. -
  6. -
- -

Шаг 2 - Разметка данных

-

- Разметка подготовленных данных осуществляется с помощью внешней системы разметки -

- -

Шаг 3 - Удаление данных для разметки

-

- Тут можно удалить промежуточные данные, используемые при создании заданий на разметку. -

-
- - \ No newline at end of file diff --git a/dedoc/config.py b/dedoc/config.py index 10711e25..f3c374eb 100644 --- a/dedoc/config.py +++ b/dedoc/config.py @@ -1,8 +1,6 @@ -import importlib.util import logging import os import sys -from typing import Any, Optional logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(pathname)s - %(levelname)s - %(message)s") @@ -13,6 +11,7 @@ # -----------------------------------------RESOURCES PATH SETTINGS---------------------------------------------------- resources_path=RESOURCES_PATH, intermediate_data_path=os.path.join(RESOURCES_PATH, "datasets"), + table_path="/tmp/tables", # -----------------------------------------COMMON DEBUG SETTINGS---------------------------------------------------- debug_mode=DEBUG_MODE, @@ -66,20 +65,11 @@ def get_instance(cls: "Configuration") -> "Configuration": return cls.__instance - def __init_config(self, args: Optional[Any] = None) -> None: - if args is not None and args.config_path is not None: - spec = importlib.util.spec_from_file_location("config_module", args.config_path) - config_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(config_module) - self.__config = config_module._config - else: + def get_config(self) -> dict: + if self.__config is None: self.__config = _config - - def get_config(self, args: Optional[Any] = None) -> dict: - if self.__config is None or args is not None: - self.__init_config(args) return self.__config -def get_config(args: Optional[Any] = None) -> dict: - return Configuration.get_instance().get_config(args) +def get_config() -> dict: + return Configuration.get_instance().get_config() diff --git a/dedoc/data_structures/hierarchy_level.py b/dedoc/data_structures/hierarchy_level.py index 06b74b50..9df3304c 100644 --- a/dedoc/data_structures/hierarchy_level.py +++ b/dedoc/data_structures/hierarchy_level.py @@ -1,6 +1,8 @@ from functools import total_ordering from typing import Optional +import numpy as np + @total_ordering class HierarchyLevel: @@ -12,7 +14,9 @@ class HierarchyLevel: - level_1 defines primary importance (e.g. root - level_1=0, header - level_1=1, etc.); - level_2 defines the level inside lines of equal type (e.g. for list items - "1." - level_2=1, "1.1." - level_2=2, etc.). - For the least important lines like raw_text both levels are None. + For the least important lines (line_type=raw_text) both levels are None. + + Look to the :ref:`hierarchy level description ` to get more details. """ root = "root" toc = "toc" @@ -46,43 +50,47 @@ def __is_defined(self, other: "HierarchyLevel") -> bool: def __eq__(self, other: "HierarchyLevel") -> bool: """ Defines the equality of two hierarchy levels: - - two raw text lines or lines with unknown type are equal; - two lines with equal level_1, level_2 are equal. + - if some of the levels is None, its value is considered as +inf (infinities have equal value) + + :param other: other hierarchy level + :return: whether current hierarchy level == other hierarchy level """ if not isinstance(other, HierarchyLevel): return False - if self.__is_defined(other) and (self.level_1, self.level_2) == (other.level_1, other.level_2): - return True - if self.line_type == HierarchyLevel.raw_text and other.line_type == HierarchyLevel.raw_text: - return True - if self.line_type == HierarchyLevel.unknown and other.line_type == HierarchyLevel.unknown: - return True - return False + level_1, level_2 = self.__to_number(self.level_1), self.__to_number(self.level_2) + other_level_1, other_level_2 = self.__to_number(other.level_1), self.__to_number(other.level_2) + return (level_1, level_2) == (other_level_1, other_level_2) def __lt__(self, other: "HierarchyLevel") -> bool: """ Defines the comparison of hierarchy levels: - - line1 < line2 if (level_1, level_2) of line1 <= (level_1, level_2) of line2; - - line1 < line2 if line2 is raw text or unknown, and line1 has another type. + - current level < other level if (level_1, level_2) < other (level_1, level_2); + - if some of the levels is None, its value is considered as +inf (infinities have equal value) - Else line1 >= line2. - - :param other: hierarchy level of the line2 + :param other: other hierarchy level + :return: whether current hierarchy level < other hierarchy level """ + # all not None if self.__is_defined(other): return (self.level_1, self.level_2) < (other.level_1, other.level_2) + + # all None if self.level_1 is None and self.level_2 is None and other.level_1 is None and other.level_2 is None: return False - if (self.level_1 is None or self.level_2 is None) and (other.level_1 is not None or other.level_2 is not None): - return False - if (self.level_1 is not None or self.level_2 is not None) and (other.level_1 is None or other.level_2 is None): - return True - return (self.level_1, self.level_2) < (other.level_1, other.level_2) + + level_1, level_2 = self.__to_number(self.level_1), self.__to_number(self.level_2) + other_level_1, other_level_2 = self.__to_number(other.level_1), self.__to_number(other.level_2) + + return (level_1, level_2) < (other_level_1, other_level_2) def __str__(self) -> str: return f"HierarchyLevel(level_1={self.level_1}, level_2={self.level_2}, can_be_multiline={self.can_be_multiline}, line_type={self.line_type})" + def __to_number(self, x: Optional[int]) -> int: + return np.inf if x is None else x + def is_raw_text(self) -> bool: """ Check if the line is raw text. diff --git a/dedoc/dedoc_manager.py b/dedoc/dedoc_manager.py index 64d96306..301498d4 100644 --- a/dedoc/dedoc_manager.py +++ b/dedoc/dedoc_manager.py @@ -10,7 +10,7 @@ from dedoc.data_structures import ParsedDocument, UnstructuredDocument from dedoc.manager_config import get_manager_config from dedoc.metadata_extractors import BaseMetadataExtractor -from dedoc.train_dataset.train_dataset_utils import get_path_original_documents, save_line_with_meta +from dedoc.utils.train_dataset_utils import get_path_original_documents, save_line_with_meta from dedoc.utils.utils import get_unique_name @@ -114,7 +114,7 @@ def __parse_no_error_handling(self, file_path: str, parameters: Dict[str, str]) self.logger.info(f"Extract structure from file {file_name}") if self.config.get("labeling_mode", False): - self.__save(os.path.join(tmp_dir, unique_filename), unstructured_document) + self.__save(converted_file_path, unstructured_document) # Step 5 - Form the output structure parsed_document = self.structure_constructor.construct(document=unstructured_document, parameters=parameters) @@ -141,5 +141,6 @@ def __init_parameters(self, file_path: str, parameters: Optional[dict]) -> dict: return result_parameters def __save(self, file_path: str, classified_document: UnstructuredDocument) -> None: + self.logger.info(f'Save document lines to {self.config["intermediate_data_path"]}') save_line_with_meta(lines=classified_document.lines, config=self.config, original_document=os.path.basename(file_path)) shutil.copy(file_path, os.path.join(get_path_original_documents(self.config), os.path.basename(file_path))) diff --git a/dedoc/main.py b/dedoc/main.py index 30b7f1f1..9c97125c 100644 --- a/dedoc/main.py +++ b/dedoc/main.py @@ -1,26 +1,7 @@ -import argparse - from dedoc.api.dedoc_api import get_api, run_api # noqa -from dedoc.config import Configuration, get_config - - -def main() -> None: - run_api(get_api()) +from dedoc.config import Configuration if __name__ == "__main__": - parser_config = argparse.ArgumentParser() - parser_config.add_argument("-c", "--config_path", help="path to configuration file") - parser_config.add_argument("-m", "--module", help="Only for tests") - parser_config.add_argument("-f", "--test_files", metavar="VALUE", nargs="*", help="Only for tests") - parser_config.add_argument("-v", "--unitest_verbose_mode", nargs="?", help="to enable verbose mode of unittest. Only for tests") - - args_config = parser_config.parse_args() - Configuration.get_instance().get_config(args_config) - config = get_config() - - if config.get("labeling_mode", False): - from api.train_dataset.train_dataset_api import run_special_api # noqa - run_special_api() - else: - main() + Configuration.get_instance().get_config() + run_api(get_api()) diff --git a/dedoc/readers/html_reader/html_reader.py b/dedoc/readers/html_reader/html_reader.py index 0e4e0a45..d398fbe9 100644 --- a/dedoc/readers/html_reader/html_reader.py +++ b/dedoc/readers/html_reader/html_reader.py @@ -57,7 +57,7 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure document_postprocess = self.postprocessor.postprocess(document) return document_postprocess - def __handle_block(self, tag: Union[Tag], uid: str, handle_invisible_table: bool) -> List[LineWithMeta]: + def __handle_block(self, tag: Union[Tag], uid: str, handle_invisible_table: bool, table: Optional[bool] = False) -> List[LineWithMeta]: tag_uid = hashlib.md5((uid + str(tag.name)).encode()).hexdigest() assert isinstance(tag, (Tag, str)) if not self.__is_content_tag(tag, handle_invisible_table=handle_invisible_table): @@ -79,14 +79,14 @@ def __handle_block(self, tag: Union[Tag], uid: str, handle_invisible_table: bool elif tag.name in HtmlTags.list_tags: block_lines = self.__read_list(lst=tag, uid=tag_uid, path_hash=uid, handle_invisible_table=handle_invisible_table) else: - block_lines = self.__handle_single_tag(tag, uid) + block_lines = self.__handle_single_tag(tag, uid, table) for line in block_lines: if not getattr(line.metadata, "html_tag", None): line.metadata.extend_other_fields({"html_tag": tag.name}) return block_lines - def __handle_single_tag(self, tag: Tag, uid: str) -> List[LineWithMeta]: - text = self.__get_text(tag) + def __handle_single_tag(self, tag: Tag, uid: str, table: Optional[bool] = False) -> List[LineWithMeta]: + text = self.__get_text(tag, table) if not text or text.isspace(): return [] @@ -99,7 +99,7 @@ def __handle_single_tag(self, tag: Tag, uid: str) -> List[LineWithMeta]: line.metadata.extend_other_fields({"html_tag": tag.name}) return [line] - def __read_blocks(self, block: Tag, path_hash: str = "", handle_invisible_table: bool = False) -> List[LineWithMeta]: + def __read_blocks(self, block: Tag, path_hash: str = "", handle_invisible_table: bool = False, table: Optional[bool] = False) -> List[LineWithMeta]: uid = hashlib.md5((path_hash + str(block.name)).encode()).hexdigest() if not self.__is_content_tag(block, handle_invisible_table=handle_invisible_table): return [] @@ -108,7 +108,7 @@ def __read_blocks(self, block: Tag, path_hash: str = "", handle_invisible_table: for tag in block: assert isinstance(tag, (Tag, str)) - block_lines = self.__handle_block(tag=tag, uid=uid, handle_invisible_table=handle_invisible_table) + block_lines = self.__handle_block(tag=tag, uid=uid, handle_invisible_table=handle_invisible_table, table=table) lines.extend(block_lines) return lines @@ -182,8 +182,10 @@ def __handle_list_item(self, item: Tag, item_index: int, list_type: str, path_ha return lines # not currently used, but may be useful in the future - def __get_text(self, tag: Tag) -> [str, int, int]: - text = tag.getText() + "\n" if tag.name == "p" else tag.getText() + def __get_text(self, tag: Tag, table: Optional[bool] = False) -> [str, int, int]: + for br in tag.find_all("br"): + br.replace_with("\n") + text = tag.getText() + "\n" if tag.name == "p" and not table else tag.getText() text = "" if text is None else text return text @@ -218,11 +220,8 @@ def _read_table(self, table: Tag, path_hash: str) -> Table: for row in table.find_all(HtmlTags.table_rows): row_lines = [] for cell in row.find_all(HtmlTags.table_cells): - uid = hashlib.md5(cell.name.encode()).hexdigest() - tag_uid = hashlib.md5((uid + cell.getText()).encode()).hexdigest() - cell_with_meta = CellWithMeta( - lines=[self.__make_line(line=cell.getText(), line_type=HierarchyLevel.unknown, uid=tag_uid, path_hash=path_hash)], + lines=self.__read_blocks(block=cell, path_hash=path_hash, handle_invisible_table=False, table=True), # read each cell as block with styles colspan=cell.colspan if cell.colspan else 1, rowspan=cell.rowspan if cell.rowspan else 1, invisible=cell.invisible if cell.invisible else True diff --git a/dedoc/readers/html_reader/html_tags.py b/dedoc/readers/html_reader/html_tags.py index 2c90681c..5e8f7f44 100644 --- a/dedoc/readers/html_reader/html_tags.py +++ b/dedoc/readers/html_reader/html_tags.py @@ -2,7 +2,7 @@ class HtmlTags: service_tags = ["script", "style"] list_items = ["li", "dd", "dt"] - block_tags = ["aside", "article", "body", "div", "footer", "header", "html", "main", "nav", "section", "form", *list_items] + block_tags = ["aside", "article", "body", "div", "blockquote", "footer", "header", "html", "main", "nav", "section", "form", *list_items] unordered_list = ["ul", "dl", "dir"] ordered_list = ["ol"] list_tags = unordered_list + ordered_list @@ -20,7 +20,7 @@ class HtmlTags: styled_tag = bold_tags + italic_tags + underlined_tags + strike_tags + superscript_tags + subscript_tags simple_text_tags = [ - "a", "abbr", "acronym", "applet", "area", "article", "aside", "bdi", "bdo", "big", "blockquote", "canvas", "caption", "center", "cite", "code", "data", + "a", "abbr", "acronym", "applet", "area", "article", "aside", "bdi", "bdo", "big", "canvas", "caption", "center", "cite", "code", "data", "font", "kbd", "mark", "output", "p", "pre", "q", "samp", "small", "span", "tt", "wbr" ] text_tags = simple_text_tags + styled_tag diff --git a/dedoc/readers/pdf_reader/data_classes/tables/scantable.py b/dedoc/readers/pdf_reader/data_classes/tables/scantable.py index c7c47fe1..4bc057df 100644 --- a/dedoc/readers/pdf_reader/data_classes/tables/scantable.py +++ b/dedoc/readers/pdf_reader/data_classes/tables/scantable.py @@ -5,6 +5,7 @@ import numpy as np from dedocutils.data_structures import BBox +from dedoc.data_structures import CellWithMeta, Table, TableMetadata from dedoc.readers.pdf_reader.data_classes.tables.cell import Cell from dedoc.readers.pdf_reader.data_classes.tables.location import Location @@ -27,6 +28,11 @@ def extended(self, table: "ScanTable") -> None: # extend order self.order = max(self.order, table.order) + def to_table(self) -> Table: + metadata = TableMetadata(page_id=self.page_number, uid=self.name, rotated_angle=self.location.rotated_angle) + cells_with_meta = [[CellWithMeta.create_from_cell(cell) for cell in row] for row in self.matrix_cells] + return Table(metadata=metadata, cells=cells_with_meta) + @staticmethod def get_cells_text(attr_cells: List[List[Cell]]) -> List[List[str]]: attrs = [] diff --git a/dedoc/readers/pdf_reader/data_classes/tables/table_tree.py b/dedoc/readers/pdf_reader/data_classes/tables/table_tree.py index 5516bd71..a212bc80 100644 --- a/dedoc/readers/pdf_reader/data_classes/tables/table_tree.py +++ b/dedoc/readers/pdf_reader/data_classes/tables/table_tree.py @@ -10,8 +10,6 @@ from dedoc.readers.pdf_reader.pdf_image_reader.ocr.ocr_cell_extractor import OCRCellExtractor from dedoc.utils.image_utils import crop_image_text -logger = logging.getLogger("TableRecognizer.TableTree") - """-------------------------------Таблица в виде дерева, полученная от OpenCV----------------------------------------""" ContourCell = namedtuple("ContourCell", ["id_con", "image"]) diff --git a/dedoc/readers/pdf_reader/data_classes/text_with_bbox.py b/dedoc/readers/pdf_reader/data_classes/text_with_bbox.py index e5a9b959..8ac0d201 100644 --- a/dedoc/readers/pdf_reader/data_classes/text_with_bbox.py +++ b/dedoc/readers/pdf_reader/data_classes/text_with_bbox.py @@ -5,6 +5,7 @@ from dedocutils.data_structures import BBox from dedoc.data_structures.annotation import Annotation +from dedoc.readers.pdf_reader.data_classes.word_with_bbox import WordWithBBox class TextWithBBox: @@ -12,19 +13,23 @@ class TextWithBBox: def __init__(self, bbox: BBox, page_num: int, - text: str, line_num: int, + words: List[WordWithBBox], uid: Optional[str] = None, label: Optional[str] = None, annotations: List[Annotation] = None) -> None: self.bbox = bbox self.page_num = page_num self.line_num = line_num - self.text = text + self.words = words self.label = label self.annotations = [] if annotations is None else annotations self.uid = f"bbox_{uuid1()}" if uid is None else uid + @property + def text(self) -> str: + return " ".join(word.text for word in self.words if word.text != "") + "\n" + def __str__(self) -> str: return f"TextWithBBox(bbox = {self.bbox}, page = {self.page_num}, text = {self.text})" @@ -36,6 +41,7 @@ def to_dict(self) -> dict: res["uid"] = self.uid res["_uid"] = self.uid res["bbox"] = self.bbox.to_dict() + res["words"] = [word.to_dict() for word in self.words] res["page_num"] = self.page_num res["line_num"] = self.line_num res["text"] = self.text diff --git a/dedoc/readers/pdf_reader/data_classes/word_with_bbox.py b/dedoc/readers/pdf_reader/data_classes/word_with_bbox.py new file mode 100644 index 00000000..cb9dc6c7 --- /dev/null +++ b/dedoc/readers/pdf_reader/data_classes/word_with_bbox.py @@ -0,0 +1,22 @@ +from collections import OrderedDict + +from dedocutils.data_structures import BBox + + +class WordWithBBox: + + def __init__(self, bbox: BBox, text: str) -> None: + self.bbox = bbox + self.text = text + + def __str__(self) -> str: + return f"WordWithBBox(bbox = {self.bbox}, text = {self.text})" + + def __repr__(self) -> str: + return self.__str__() + + def to_dict(self) -> dict: + res = OrderedDict() + res["bbox"] = self.bbox.to_dict() + res["text"] = self.text + return res diff --git a/dedoc/readers/pdf_reader/pdf_auto_reader/pdf_auto_reader.py b/dedoc/readers/pdf_reader/pdf_auto_reader/pdf_auto_reader.py index c91cc779..e2c3ad37 100644 --- a/dedoc/readers/pdf_reader/pdf_auto_reader/pdf_auto_reader.py +++ b/dedoc/readers/pdf_reader/pdf_auto_reader/pdf_auto_reader.py @@ -36,7 +36,7 @@ def __init__(self, *, config: Optional[dict] = None) -> None: self.pdf_txtlayer_reader = PdfTxtlayerReader(config=self.config) self.pdf_tabby_reader = PdfTabbyReader(config=self.config) self.pdf_image_reader = PdfImageReader(config=self.config) - self.txtlayer_detector = TxtLayerDetector(pdf_txtlayer_reader=self.pdf_txtlayer_reader, pdf_tabby_reader=self.pdf_tabby_reader, config=self.config) + self.txtlayer_detector = TxtLayerDetector(pdf_reader=self.pdf_tabby_reader, config=self.config) def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, extension: Optional[str] = None, parameters: Optional[dict] = None) -> bool: """ diff --git a/dedoc/readers/pdf_reader/pdf_auto_reader/txtlayer_detector.py b/dedoc/readers/pdf_reader/pdf_auto_reader/txtlayer_detector.py index 5d3ac9a9..cfff918d 100644 --- a/dedoc/readers/pdf_reader/pdf_auto_reader/txtlayer_detector.py +++ b/dedoc/readers/pdf_reader/pdf_auto_reader/txtlayer_detector.py @@ -6,21 +6,18 @@ from dedoc.data_structures import LineWithMeta from dedoc.readers.pdf_reader.pdf_auto_reader.txtlayer_classifier import TxtlayerClassifier from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdf_tabby_reader import PdfTabbyReader -from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdf_txtlayer_reader import PdfTxtlayerReader -from dedoc.utils.pdf_utils import get_pdf_page_count PdfTxtlayerParameters = namedtuple("PdfTxtlayerParameters", ["is_correct_text_layer", "is_first_page_correct"]) class TxtLayerDetector: - def __init__(self, pdf_txtlayer_reader: PdfTxtlayerReader, pdf_tabby_reader: PdfTabbyReader, *, config: dict) -> None: + def __init__(self, pdf_reader: PdfTabbyReader, *, config: dict) -> None: self.config = config self.logger = config.get("logger", logging.getLogger()) self.txtlayer_classifier = TxtlayerClassifier(config=config) - self.pdf_txtlayer_reader = pdf_txtlayer_reader - self.pdf_tabby_reader = pdf_tabby_reader + self.pdf_reader = pdf_reader def detect_txtlayer(self, path: str, parameters: dict) -> PdfTxtlayerParameters: """ @@ -44,15 +41,7 @@ def __get_lines_for_predict(self, path: str, parameters: dict) -> List[LineWithM parameters_copy = deepcopy(parameters) parameters_copy["pages"] = "1:8" # two batches for pdf_txtlayer_reader parameters_copy["need_pdf_table_analysis"] = "false" - num_pages = get_pdf_page_count(path) - if num_pages is None or num_pages >= 50: - # TODO remove this when TLDR-518 is done - document = self.pdf_txtlayer_reader.read(path, parameters=parameters_copy) - else: - # tabby reader reads the whole document regardless "pages" parameter - # still it's faster to use tabby for documents with <= 50 pages - document = self.pdf_tabby_reader.read(path, parameters=parameters_copy) - + document = self.pdf_reader.read(path, parameters=parameters_copy) return document.lines def __is_first_page_correct(self, lines: List[LineWithMeta], is_txt_layer_correct: bool) -> bool: diff --git a/dedoc/readers/pdf_reader/pdf_base_reader.py b/dedoc/readers/pdf_reader/pdf_base_reader.py index d52e0d3c..fd6ed93b 100644 --- a/dedoc/readers/pdf_reader/pdf_base_reader.py +++ b/dedoc/readers/pdf_reader/pdf_base_reader.py @@ -13,10 +13,7 @@ import dedoc.utils.parameter_utils as param_utils from dedoc.attachments_extractors.concrete_attachments_extractors.pdf_attachments_extractor import PDFAttachmentsExtractor from dedoc.common.exceptions.bad_file_error import BadFileFormatError -from dedoc.data_structures.cell_with_meta import CellWithMeta from dedoc.data_structures.line_with_meta import LineWithMeta -from dedoc.data_structures.table import Table -from dedoc.data_structures.table_metadata import TableMetadata from dedoc.data_structures.unstructured_document import UnstructuredDocument from dedoc.extensions import recognized_extensions, recognized_mimes from dedoc.readers.base_reader import BaseReader @@ -92,12 +89,7 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure ) lines, scan_tables, attachments, warnings, other_fields = self._parse_document(file_path, params_for_parse) - tables = [] - for scan_table in scan_tables: - metadata = TableMetadata(page_id=scan_table.page_number, uid=scan_table.name, rotated_angle=scan_table.location.rotated_angle) - cells_with_meta = [[CellWithMeta.create_from_cell(cell) for cell in row] for row in scan_table.matrix_cells] - table = Table(metadata=metadata, cells=cells_with_meta) - tables.append(table) + tables = [scan_table.to_table() for scan_table in scan_tables] if self._can_contain_attachements(file_path) and self.attachment_extractor.with_attachments(parameters): attachments += self.attachment_extractor.extract(file_path=file_path, parameters=parameters) diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/line_metadata_extractor/font_type_classifier.py b/dedoc/readers/pdf_reader/pdf_image_reader/line_metadata_extractor/font_type_classifier.py index bcabdbc9..380c47f0 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/line_metadata_extractor/font_type_classifier.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/line_metadata_extractor/font_type_classifier.py @@ -12,11 +12,19 @@ def predict_annotations(self, page: PageWithBBox) -> PageWithBBox: if len(page.bboxes) == 0: return page - bboxes = [bbox.bbox for bbox in page.bboxes] + bboxes = [word.bbox for line in page.bboxes for word in line.words] bold_probabilities = self.bold_classifier.classify(page.image, bboxes) - for bbox, bold_probability in zip(page.bboxes, bold_probabilities): - if bold_probability > 0.5: - bbox.annotations.append(BoldAnnotation(start=0, end=len(bbox.text), value="True")) + bbox_id = 0 + for line in page.bboxes: + current_text_len = 0 + + for word in line.words: + current_text_len = current_text_len + 1 if current_text_len > 0 else current_text_len # add len of " " (space between words) + extended_text_len = current_text_len + len(word.text) + if bold_probabilities[bbox_id] > 0.5: + line.annotations.append(BoldAnnotation(start=current_text_len, end=extended_text_len, value="True")) + current_text_len = extended_text_len + bbox_id += 1 return page diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/ocr/ocr_line_extractor.py b/dedoc/readers/pdf_reader/pdf_image_reader/ocr/ocr_line_extractor.py index 64bc2c93..bd4c8c43 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/ocr/ocr_line_extractor.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/ocr/ocr_line_extractor.py @@ -4,6 +4,7 @@ from dedoc.readers.pdf_reader.data_classes.page_with_bboxes import PageWithBBox from dedoc.readers.pdf_reader.data_classes.text_with_bbox import TextWithBBox +from dedoc.readers.pdf_reader.data_classes.word_with_bbox import WordWithBBox from dedoc.readers.pdf_reader.pdf_image_reader.ocr.ocr_utils import get_text_with_bbox_from_document_page, get_text_with_bbox_from_document_page_one_column @@ -30,12 +31,14 @@ def __split_image2bboxes(self, image: np.ndarray, page_num: int, language: str, height, width = image.shape[:2] extract_line_bbox = self.config.get("labeling_mode", False) - line_boxes = [ - TextWithBBox(text=line.text, page_num=page_num, bbox=line.bbox, line_num=line_num, - annotations=line.get_annotations(width, height, extract_line_bbox)) for line_num, line in enumerate(output_dict.lines) - ] + lines_with_bbox = [] + for line_num, line in enumerate(output_dict.lines): + words = [WordWithBBox(text=word.text, bbox=word.bbox) for word in line.words] + annotations = line.get_annotations(width, height, extract_line_bbox) + line_with_bbox = TextWithBBox(words=words, page_num=page_num, bbox=line.bbox, line_num=line_num, annotations=annotations) + lines_with_bbox.append(line_with_bbox) - return line_boxes + return lines_with_bbox def _filtered_bboxes(self, bboxes: List[TextWithBBox]) -> Iterable[TextWithBBox]: for text_with_bbox in bboxes: diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/ocr/ocr_page/ocr_line.py b/dedoc/readers/pdf_reader/pdf_image_reader/ocr/ocr_page/ocr_line.py index be72d182..851fd6c2 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/ocr/ocr_page/ocr_line.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/ocr/ocr_page/ocr_line.py @@ -24,6 +24,9 @@ def text(self) -> str: return " ".join(word.text for word in self.words if word.text != "") + "\n" def get_annotations(self, page_width: int, page_height: int, extract_line_bbox: bool) -> List[Annotation]: + if extract_line_bbox: + return [BBoxAnnotation(0, len(" ".join([w.text for w in self.words])), self.bbox, page_width, page_height)] + start = 0 annotations = [] @@ -35,8 +38,7 @@ def get_annotations(self, page_width: int, page_height: int, extract_line_bbox: annotations.append(ConfidenceAnnotation(start, end, str(word.confidence / 100))) annotations.append(BBoxAnnotation(start, end, word.bbox, page_width, page_height)) start += len(word.text) + 1 - if extract_line_bbox: - annotations.append(BBoxAnnotation(0, start, self.bbox, page_width, page_height)) + return annotations @staticmethod diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py b/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py index 2daede1b..fd2cf6ff 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py @@ -14,7 +14,6 @@ from dedoc.readers.pdf_reader.pdf_base_reader import ParametersForParseDoc, PdfBaseReader from dedoc.readers.pdf_reader.pdf_image_reader.columns_orientation_classifier.columns_orientation_classifier import ColumnsOrientationClassifier from dedoc.readers.pdf_reader.pdf_image_reader.ocr.ocr_line_extractor import OCRLineExtractor -from dedoc.train_dataset.train_dataset_utils import save_page_with_bbox from dedoc.utils import supported_image_types from dedoc.utils.parameter_utils import get_path_param from dedoc.utils.utils import get_mime_extension @@ -94,9 +93,6 @@ def _process_one_page(self, page = self.ocr.split_image2lines(image=clean_image, language=parameters.language, is_one_column_document=is_one_column_document, page_num=page_number) lines = self.metadata_extractor.extract_metadata_and_set_annotations(page_with_lines=page) - if self.config.get("labeling_mode"): - save_page_with_bbox(page=page, config=self.config, document_name=os.path.basename(path)) - return lines, tables, page.attachments, [angle] def _detect_column_count_and_orientation(self, image: np.ndarray, parameters: ParametersForParseDoc) -> Tuple[np.ndarray, bool, float]: diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_recognizer.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_recognizer.py index 4a61530a..c1124ca4 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_recognizer.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_recognizer.py @@ -1,6 +1,7 @@ import json import logging import os +import time from typing import List, Optional, Tuple import cv2 @@ -13,7 +14,6 @@ from dedoc.readers.pdf_reader.data_classes.tables.table_type import TableTypeAdditionalOptions from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.concrete_extractors.multipage_table_extractor import MultiPageTableExtractor from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_extractors.concrete_extractors.onepage_table_extractor import OnePageTableExtractor -from dedoc.train_dataset.data_path_config import table_path as save_path """-------------------------------------entry class of Table Recognizer Module---------------------------------------""" @@ -22,7 +22,7 @@ class TableRecognizer(object): def __init__(self, *, config: dict = None) -> None: - self.logger = config.get("logger", logging.getLogger(__name__)) + self.logger = config.get("logger", logging.getLogger()) self.onepage_tables_extractor = OnePageTableExtractor(config=config, logger=self.logger) self.multipage_tables_extractor = MultiPageTableExtractor(config=config, logger=self.logger) @@ -73,7 +73,7 @@ def __rec_tables_from_img(self, orient_cell_angle=orient_cell_angle, table_type=table_type) if self.config.get("labeling_mode", False): - self.__save_tables(tables=single_page_tables, image=src_image, table_path=save_path) + self.__save_tables(tables=single_page_tables, image=src_image, table_path=self.config.get("table_path", "/tmp/tables")) if self.table_type.detect_one_cell_table in table_type: filtered_tables = single_page_tables else: @@ -142,9 +142,9 @@ def __save_tables(self, tables: List[ScanTable], image: np.ndarray, table_path: image = Image.fromarray(image) os.makedirs(table_path, exist_ok=True) for table in tables: - images_cnt = len(os.listdir(table_path)) - image_path = os.path.join(table_path, f"{images_cnt:06d}.png") - jsons_path = os.path.join(table_path, f"{images_cnt:06d}.json") + file_name = str(int(time.time())) + image_path = os.path.join(table_path, f"{file_name}.png") + jsons_path = os.path.join(table_path, f"{file_name}.json") image.save(image_path) with open(jsons_path, "w") as out: json.dump(obj=table.to_dict(), fp=out, indent=4, ensure_ascii=False) diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py index 4aad36fa..5671794c 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/table_recognizer/table_utils/img_processing.py @@ -13,7 +13,6 @@ from dedoc.utils.parameter_utils import get_path_param logger = get_config().get("logger", logging.getLogger()) -logger = logger if logger else logging.getLogger("TableRecognizer.detect_tables_by_contours") table_options = TableTypeAdditionalOptions() ROTATE_THRESHOLD = 0.3 diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py index 0edc191c..afa15d26 100644 --- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py +++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py @@ -207,6 +207,7 @@ def __get_lines_with_location(self, page: dict, file_hash: str) -> List[LineWith lines = [] page_number, page_width, page_height = page["number"], int(page["width"]), int(page["height"]) prev_line = None + labeling_mode = self.config.get("labeling_mode", False) for block in page["blocks"]: annotations = [] @@ -219,9 +220,12 @@ def __get_lines_with_location(self, page: dict, file_hash: str) -> List[LineWith for annotation in block["annotations"]: start = annotation["start"] end = annotation["end"] - box = BBox(x_top_left=int(annotation["x_top_left"]), y_top_left=int(annotation["y_top_left"]), - width=int(annotation["width"]), height=int(annotation["height"])) - annotations.append(BBoxAnnotation(start, end, box, page_width=page_width, page_height=page_height)) + + if not labeling_mode: + box = BBox(x_top_left=int(annotation["x_top_left"]), y_top_left=int(annotation["y_top_left"]), + width=int(annotation["width"]), height=int(annotation["height"])) + annotations.append(BBoxAnnotation(start, end, box, page_width=page_width, page_height=page_height)) + annotations.append(SizeAnnotation(start, end, str(annotation["font_size"]))) annotations.append(StyleAnnotation(start, end, annotation["font_name"])) @@ -235,7 +239,7 @@ def __get_lines_with_location(self, page: dict, file_hash: str) -> List[LineWith annotations.append(LinkedTextAnnotation(start, end, annotation["url"])) bbox = BBox(x_top_left=int(block["x_top_left"]), y_top_left=int(block["y_top_left"]), width=int(block["width"]), height=int(block["height"])) - if self.config.get("labeling_mode", False): + if labeling_mode: annotations.append(BBoxAnnotation(0, len_block, bbox, page_width=page_width, page_height=page_height)) meta = block["metadata"].lower() diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py index c0e99c43..86277bf0 100644 --- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py +++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py @@ -1,4 +1,3 @@ -import os from typing import List, Optional, Tuple import numpy as np @@ -10,7 +9,6 @@ from dedoc.readers.pdf_reader.data_classes.tables.scantable import ScanTable from dedoc.readers.pdf_reader.pdf_base_reader import ParametersForParseDoc, PdfBaseReader from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdfminer_reader.pdfminer_extractor import PdfminerExtractor -from dedoc.train_dataset.train_dataset_utils import save_page_with_bbox from dedoc.utils.parameter_utils import get_param_pdf_with_txt_layer from dedoc.utils.utils import get_mime_extension @@ -66,9 +64,6 @@ def _process_one_page(self, lines = self.metadata_extractor.extract_metadata_and_set_annotations(page_with_lines=page, call_classifier=False) self.__change_table_boxes_page_width_heigth(pdf_width=page.pdf_page_width, pdf_height=page.pdf_page_height, tables=tables) - if self.config.get("labeling_mode"): - save_page_with_bbox(page=page, config=self.config, document_name=os.path.basename(path)) - return lines, tables, page.attachments, [] def __change_table_boxes_page_width_heigth(self, pdf_width: int, pdf_height: int, tables: List[ScanTable]) -> None: diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_extractor.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_extractor.py index 41196f00..f76c10d4 100644 --- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_extractor.py +++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_extractor.py @@ -27,7 +27,8 @@ from dedoc.readers.pdf_reader.data_classes.pdf_image_attachment import PdfImageAttachment from dedoc.readers.pdf_reader.data_classes.tables.location import Location from dedoc.readers.pdf_reader.data_classes.text_with_bbox import TextWithBBox -from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdfminer_reader.pdfminer_utils import cleaning_text_from_hieroglyphics, create_bbox, draw_annotation +from dedoc.readers.pdf_reader.data_classes.word_with_bbox import WordWithBBox +from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdfminer_reader.pdfminer_utils import create_bbox, draw_annotation from dedoc.utils.parameter_utils import get_path_param from dedoc.utils.pdf_utils import get_page_image @@ -37,11 +38,10 @@ class PdfminerExtractor(object): """ - Class extarcts text with style from pdf with help pdfminer.six + Class extracts text with style from pdf with help pdfminer.six """ def __init__(self, *, config: dict) -> None: - super().__init__() self.config = config self.logger = self.config.get("logger", logging.getLogger()) @@ -151,18 +151,23 @@ def __get_interpreter(self) -> Tuple[PDFPageAggregator, PDFPageInterpreter]: def get_info_layout_object(self, lobj: LTContainer, page_num: int, line_num: int, k_w: float, k_h: float, height: int, width: int) -> TextWithBBox: # 1 - converting coordinate from pdf format into image bbox = create_bbox(height, k_h, k_w, lobj) + # 2 - extract text and text annotations from current object - text = "" annotations = [] + words = [] if isinstance(lobj, LTTextLineHorizontal): - # cleaning text from (cid: *) - text = cleaning_text_from_hieroglyphics(lobj.get_text()) # get line's annotations - annotations = self.__get_line_annotations(lobj, k_w, k_h, height, width) + annotations, words = self.__get_line_annotations(lobj, height, width) + + text_with_bbox = TextWithBBox(bbox=bbox, page_num=page_num, words=words, line_num=line_num, annotations=annotations) - return TextWithBBox(bbox=bbox, page_num=page_num, text=text, line_num=line_num, annotations=annotations) + if self.config.get("labeling_mode", False): # add bbox for a textual line + bbox = create_bbox(height=height, k_h=1.0, k_w=1.0, lobj=lobj) + text_with_bbox.annotations.append(BBoxAnnotation(start=0, end=len(text_with_bbox.text), value=bbox, page_width=width, page_height=height)) - def __get_line_annotations(self, lobj: LTTextLineHorizontal, k_w: float, k_h: float, height: int, width: int) -> List[Annotation]: + return text_with_bbox + + def __get_line_annotations(self, lobj: LTTextLineHorizontal, height: int, width: int) -> Tuple[List[Annotation], List[WordWithBBox]]: # 1 - prepare data for group by name chars_with_style = [] rand_weight = self._get_new_weight() @@ -187,18 +192,20 @@ def __get_line_annotations(self, lobj: LTTextLineHorizontal, k_w: float, k_h: fl # duplicated previous style chars_with_style.append(chars_with_style[-1]) - annotations = self.__extract_words_bbox_annotation(lobj, height, width) + annotations, words = self.__extract_words_bbox_annotation(lobj, height, width) + if self.config.get("labeling_mode", False): + annotations = [] # ignore bboxes of words + # 3 - extract range from chars_with_style array char_pointer = 0 - for key, group in itertools.groupby(chars_with_style, lambda x: x): count_chars = len(list(group)) annotations.extend(self.__parse_style_string(key, char_pointer, char_pointer + count_chars - 1)) char_pointer += count_chars - return annotations + return annotations, words - def __extract_words_bbox_annotation(self, lobj: LTTextContainer, height: int, width: int) -> List[Annotation]: + def __extract_words_bbox_annotation(self, lobj: LTTextContainer, height: int, width: int) -> Tuple[List[Annotation], List[WordWithBBox]]: words: List[WordObj] = [] word: WordObj = WordObj(start=0, end=0, value=LTTextContainer()) if isinstance(lobj, LTTextLineHorizontal): @@ -214,14 +221,13 @@ def __extract_words_bbox_annotation(self, lobj: LTTextContainer, height: int, wi words.append(word) word = WordObj(start=item + 1, end=item + 1, value=LTTextContainer()) - annotations = [ - BBoxAnnotation(start=word.start, - end=word.end, - value=create_bbox(height=height, k_h=1.0, k_w=1.0, lobj=word.value), - page_width=width, - page_height=height) for word in words - ] - return annotations + annotations, words_with_bbox = [], [] + for word in words: + word_bbox = create_bbox(height=height, k_h=1.0, k_w=1.0, lobj=word.value) + annotations.append(BBoxAnnotation(start=word.start, end=word.end, value=word_bbox, page_width=width, page_height=height)) + words_with_bbox.append(WordWithBBox(text=word.value.get_text(), bbox=word_bbox)) + + return annotations, words_with_bbox def _get_new_weight(self) -> str: return binascii.hexlify(os.urandom(8)).decode("ascii") @@ -269,10 +275,12 @@ def __debug_extract_layout(self, image_src: np.ndarray, layout: LTContainer, pag for lobj in lobjs: if isinstance(lobj, LTTextBoxHorizontal): - annotations.extend(self.__extract_words_bbox_annotation(lobj, height, width)) + lobj_annotations, _ = self.__extract_words_bbox_annotation(lobj, height, width) + annotations.extend(lobj_annotations) lobjs_textline.extend(lobj) elif isinstance(lobj, LTTextLineHorizontal): - annotations.extend(self.__extract_words_bbox_annotation(lobj, height, width)) + lobj_annotations, _ = self.__extract_words_bbox_annotation(lobj, height, width) + annotations.extend(lobj_annotations) lobjs_textline.append(lobj) elif isinstance(lobj, LTRect): lobjs_box.append(lobj) diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_utils.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_utils.py index a9361cda..1295820d 100644 --- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_utils.py +++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdfminer_reader/pdfminer_utils.py @@ -1,4 +1,3 @@ -import re from typing import IO, List, Match, Optional, Tuple import cv2 @@ -57,15 +56,6 @@ def create_bbox(height: int, k_h: float, k_w: float, lobj: LTContainer) -> BBox: return bbox -def cleaning_text_from_hieroglyphics(text_str: str) -> str: - """ - replace all cid-codecs into ascii symbols. cid-encoding - hieroglyphic fonts - :param text_str: text - :return: text wo cids-chars - """ - return re.sub(r"\(cid:(\d)*\)", cid_to_ascii_text, text_str) - - def cid_to_ascii_text(m: Match) -> str: v = m.group(0) v = v.strip("(").strip(")") diff --git a/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py b/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py index 92a39fb0..9ce18e0e 100644 --- a/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py +++ b/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py @@ -31,7 +31,8 @@ def extract(self, document: UnstructuredDocument, parameters: Optional[dict] = N previous_line = None for line in document.lines: - assert line.metadata.tag_hierarchy_level is not None + if line.metadata.tag_hierarchy_level is None: + line.metadata.tag_hierarchy_level = HierarchyLevel.create_unknown() if line.metadata.tag_hierarchy_level.line_type == HierarchyLevel.unknown: line.metadata.hierarchy_level = self.get_list_hl_with_regexp(line, previous_line) diff --git a/dedoc/structure_extractors/concrete_structure_extractors/tz_structure_extractor.py b/dedoc/structure_extractors/concrete_structure_extractors/tz_structure_extractor.py index 4c9c0993..b48efa57 100644 --- a/dedoc/structure_extractors/concrete_structure_extractors/tz_structure_extractor.py +++ b/dedoc/structure_extractors/concrete_structure_extractors/tz_structure_extractor.py @@ -6,10 +6,10 @@ from dedoc.extensions import recognized_mimes from dedoc.structure_extractors.abstract_structure_extractor import AbstractStructureExtractor from dedoc.structure_extractors.feature_extractors.list_features.prefix.bullet_prefix import BulletPrefix -from dedoc.structure_extractors.feature_extractors.tz_feature_extractor import TzTextFeatures from dedoc.structure_extractors.hierarchy_level_builders.header_builder.header_hierarchy_level_builder import HeaderHierarchyLevelBuilder from dedoc.structure_extractors.hierarchy_level_builders.toc_builder.toc_builder import TocBuilder from dedoc.structure_extractors.hierarchy_level_builders.tz_builder.body_builder import TzBodyBuilder +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_ends_of_number, regexps_number, regexps_subitem from dedoc.structure_extractors.line_type_classifiers.tz_classifier import TzLineTypeClassifier @@ -70,6 +70,6 @@ def extract(self, document: UnstructuredDocument, parameters: Optional[dict] = N document.lines = self._postprocess(lines=header_lines + toc_lines + body_lines, paragraph_type=["item"], - regexps=[BulletPrefix.regexp, TzTextFeatures.number_regexp, TzTextFeatures.item_regexp], - excluding_regexps=[None, TzTextFeatures.ends_of_number, TzTextFeatures.ends_of_number]) + regexps=[BulletPrefix.regexp, regexps_number, regexps_subitem], + excluding_regexps=[None, regexps_ends_of_number, regexps_ends_of_number]) return document diff --git a/dedoc/structure_extractors/feature_extractors/abstract_extractor.py b/dedoc/structure_extractors/feature_extractors/abstract_extractor.py index 49d11bc4..39f36de8 100644 --- a/dedoc/structure_extractors/feature_extractors/abstract_extractor.py +++ b/dedoc/structure_extractors/feature_extractors/abstract_extractor.py @@ -14,39 +14,79 @@ from dedoc.data_structures.concrete_annotations.spacing_annotation import SpacingAnnotation from dedoc.data_structures.concrete_annotations.underlined_annotation import UnderlinedAnnotation from dedoc.data_structures.line_with_meta import LineWithMeta -from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_ends_of_number, regexps_number, regexps_subitem, regexps_subitem_extended, \ - regexps_year +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_ends_of_number, regexps_number from dedoc.utils.utils import list_get class AbstractFeatureExtractor(ABC): - number_regexp = regexps_number - ends_of_number = regexps_ends_of_number - item_regexp = regexps_subitem - item_extended_regexp = regexps_subitem_extended - year_regexp = regexps_year @abstractmethod def parameters(self) -> dict: + """ + Returns the dictionary with parameters for the `__init__` method of the feature extractor. + """ pass @abstractmethod def fit(self, documents: List[List[LineWithMeta]], y: Optional[List[str]] = None) -> "AbstractFeatureExtractor": + """ + :param documents: list of documents in form of list of lines + :param y: the list of labels + :returns: feature extractor (it may be fitted if needed) + """ pass @abstractmethod def transform(self, documents: List[List[LineWithMeta]], y: Optional[List[str]] = None) -> pd.DataFrame: + """ + Extract a list of float features for each document line. + + :param documents: list of documents in form of list of lines + :param y: the list of labels + :returns: matrix of features extracted for the document + """ pass def fit_transform(self, documents: List[List[LineWithMeta]], y: Optional[List[str]] = None) -> pd.DataFrame: + """ + :param documents: list of documents in form of list of lines + :param y: the list of labels + :returns: matrix of features extracted for the document + """ self.fit(documents, y) return self.transform(documents) + def prev_next_line_features(self, matrix: pd.DataFrame, n_prev: int, n_next: int) -> pd.DataFrame: + """ + Add features of previous and next lines to the input feature matrix. + For each document line, `n_prev` features of previous lines and `n_next` features of next lines will be added. + For lines that don't have previous/next lines, zeros are added as features + + :param matrix: matrix where rows are lines, columns are features + :param n_prev: the number of previous lines, those features is needed to add to the current line's features + :param n_next: the number of next lines, those features is needed to add to the current line's features + :return: feature matrix with added features + """ + feature_names = matrix.columns + prev_line_features = [ + pd.DataFrame(data=self._prev_line_features(matrix.values, i), columns=self.__create_features_name(feature_names, "prev", i)) + for i in range(1, n_prev + 1) + ] + next_line_features = [ + pd.DataFrame(data=self._next_line_features(matrix.values, i), columns=self.__create_features_name(feature_names, "next", i)) + for i in range(1, n_next + 1) + ] + + matrices = [matrix] + prev_line_features + next_line_features + result_matrix = pd.concat(matrices, axis=1) + return result_matrix + @staticmethod def _prev_line_features(feature_matrix: np.ndarray, n: int) -> np.ndarray: """ - @param feature_matrix: matrix where rows are lines, columns are features - @return: feature matrix where lines were shifted on n lines + :param feature_matrix: matrix where rows are lines, columns are features + :param n: the number of previous lines, those features is needed to add to the current line's features + :return: feature matrix where lines were shifted on n lines downwards (feature matrix for n previous lines, for the first n lines zeros are added) """ if n > feature_matrix.shape[0]: return np.zeros(feature_matrix.shape) @@ -54,46 +94,39 @@ def _prev_line_features(feature_matrix: np.ndarray, n: int) -> np.ndarray: @staticmethod def _next_line_features(feature_matrix: np.ndarray, n: int) -> np.ndarray: + """ + :param feature_matrix: matrix where rows are lines, columns are features + :param n: the number of next lines, those features is needed to add to the current line's features + :return: feature matrix where lines were shifted on n lines upwards (feature matrix for n next lines, for the last n lines zeros are added) + """ if n > feature_matrix.shape[0]: return np.zeros(feature_matrix.shape) return np.vstack((feature_matrix[n:, :], np.zeros((n, feature_matrix.shape[1])))) - @staticmethod - def _create_features_name(old_names: pd.Index, which: str, num: int) -> List[str]: + def __create_features_name(self, old_names: pd.Index, which: str, num: int) -> List[str]: return [f"{old_name}_{which}_{num}" for old_name in old_names] - def prev_next_line_features(self, matrix: pd.DataFrame, n_prev: int, n_next: int) -> pd.DataFrame: - """ - add previous and next features with their names + def _start_regexp(self, line: str, regexps: List[Pattern], suffix: Optional[str] = None) -> Iterable[Tuple[str, float]]: """ - feature_names = matrix.columns - prev_line_features = [ - pd.DataFrame(data=self._prev_line_features(matrix.values, i), columns=self._create_features_name(feature_names, "prev", i)) - for i in range(1, n_prev + 1) - ] - next_line_features = [ - pd.DataFrame(data=self._next_line_features(matrix.values, i), columns=self._create_features_name(feature_names, "next", i)) - for i in range(1, n_next + 1) - ] + Apply regular expressions to the given line, calculate the length of the match and number of matches in the line - matrices = [matrix] + prev_line_features + next_line_features - result_matrix = pd.concat(matrices, axis=1) - return result_matrix - - def _start_regexp(self, line: str, regexps: List[Pattern], suffix: Optional[str] = None) -> Iterable[Tuple[str, float]]: + :param line: document line + :param regexps: list of regular expressions that will be applied to the line + :param suffix: the additional info for feature naming + :return: feature name + value, values are length of the match and number of matches + """ matches = 0 text = line.strip() for i, pattern in enumerate(regexps): # list patterns - if suffix is None: - feature_name = f"start_regexp_{i}" - else: - feature_name = f"start_regexp_{i}_{suffix}" + feature_name = f"start_regexp_{i}" if suffix is None else f"start_regexp_{i}_{suffix}" + match = pattern.match(text) if match is not None and match.end() > 0: matches += 1 yield feature_name, match.end() - match.start() else: yield feature_name, 0 + if suffix is None: yield "start_regexp_num_matches", matches else: @@ -101,16 +134,28 @@ def _start_regexp(self, line: str, regexps: List[Pattern], suffix: Optional[str] @staticmethod def _get_size(line: LineWithMeta) -> float: + """ + :param line: document line + :return: font size value + """ sizes = [annotation for annotation in line.annotations if annotation.name == SizeAnnotation.name] # font size return float(sizes[0].value) if len(sizes) > 0 else 0. @staticmethod def _get_bold(line: LineWithMeta) -> float: + """ + :param line: document line + :return: indicator if there are some parts of the line bold + """ bold = [annotation for annotation in line.annotations if annotation.name == BoldAnnotation.name and annotation.value == "True"] return 1. if len(bold) > 0 else 0 @staticmethod def _get_bold_percent(line: LineWithMeta) -> float: + """ + :param line: document line + :return: the percent of bold characters in a line + """ bold_character_number = sum([ annotation.end - annotation.start for annotation in line.annotations if annotation.name == BoldAnnotation.name and annotation.value == "True" ]) @@ -120,6 +165,10 @@ def _get_bold_percent(line: LineWithMeta) -> float: @staticmethod def _is_first_bold(line: LineWithMeta) -> float: + """ + :param line: document line + :return: indicator if the beginning of the line is bold + """ for annotation in line.annotations: if annotation.name == BoldAnnotation.name and annotation.value == "True" and annotation.start == 0: return 1. @@ -127,16 +176,28 @@ def _is_first_bold(line: LineWithMeta) -> float: @staticmethod def _get_italic(line: LineWithMeta) -> float: + """ + :param line: document line + :return: indicator if there are some parts of the line italic + """ italic = [annotation for annotation in line.annotations if annotation.name == ItalicAnnotation.name] return 1. if len(italic) > 0 else 0 @staticmethod def _get_underlined(line: LineWithMeta) -> float: + """ + :param line: document line + :return: indicator if there are some parts of the line underlined + """ underlined = [annotation for annotation in line.annotations if annotation.name == UnderlinedAnnotation.name] return 1. if len(underlined) > 0 else 0 @staticmethod def _get_color(line: LineWithMeta) -> Iterable[Tuple[str, float]]: + """ + :param line: document line + :return: color values (R,G,B+color dispersion) with features names + """ color = [annotation for annotation in line.annotations if annotation.name == ColorAnnotation.name] if len(color) == 0: value = {} @@ -153,21 +214,32 @@ def _get_color(line: LineWithMeta) -> Iterable[Tuple[str, float]]: @staticmethod def _get_spacing(line: LineWithMeta) -> float: + """ + :param line: document line + :return: spacing value (distance between two lines) + """ spacing = [annotation for annotation in line.annotations if annotation.name == SpacingAnnotation.name] return float(spacing[0].value) if len(spacing) > 0 else -1 @staticmethod def _get_indentation(line: LineWithMeta) -> float: + """ + :param line: document line + :return: indentation value (distance between line and document margin) + """ indentation = [annotation for annotation in line.annotations if isinstance(annotation, IndentationAnnotation)] return float(indentation[0].value) if len(indentation) > 0 else 0 @staticmethod def _can_be_prev_element(this_item: Optional[str], prev_item: Optional[str]) -> bool: """ - check if prev_item can be the previous element of this item in the correct list - For example "2" can be previous element of "3", "2.1." can be previous element of "2.1.1" - If prev_item is None then this_item is the first item in the list and should be 1 - @return: + Check if `prev_item` can be the previous element of `this_item` in the correct list + For example, "2" can be previous element of "3", "2.1." can be previous element of "2.1.1" + If `prev_item` is None then `this_item` is the first item in the list and should be 1 + + :param this_item: text of the current list item + :param this_item: text of the previous list item + :return: True if `prev_item` can be the previous element of `this_item` """ if this_item is None: return False @@ -191,11 +263,14 @@ def _can_be_prev_element(this_item: Optional[str], prev_item: Optional[str]) -> def _before_special_line(self, document: List[LineWithMeta], find_special_line: method) -> List[float]: """ - @param document: document in form of list of lines - @param find_special_line: function that find some special line, see __find_toc for example - @return: list of distances from given line to first special line. If line lay before the special line, then - distance is negative, if line lay after special line then distance is positive, for special line distance is 0 - If there is no special line in the document then distance is 0 for every line + Find "distance" to the closest special line in the document. + If line is located before the special line, then distance is negative, if line is after special line then distance is positive, + for special line distance is 0. + If there is no special line in the document then distance is 0 for every line. + + :param document: document in form of list of lines + :param find_special_line: function that finds some special line, see `__find_toc` for example + :return: list of distances from given line to first special line """ result = [] special_line_position = find_special_line(document) @@ -207,14 +282,24 @@ def _before_special_line(self, document: List[LineWithMeta], find_special_line: result.append(line_id - special_line_id) return result - def _list_features(self, lines: List[LineWithMeta]) -> List[float]: + def _list_features(self, lines: List[LineWithMeta], list_item_regexp: Pattern = regexps_number, end_regexp: Pattern = regexps_ends_of_number) \ + -> List[float]: + """ + For each line, the indicator is computed if the line is a list item. + The indicator is obtained using regular expressions and checking, if list items form the correct list. + + :param lines: list of document lines + :param list_item_regexp: regular expression for a list item + :param end_regexp: regular expression for a text after the list item + :return: list of indicators if a line is a list item + """ previous_ids = [-1] texts = [line.line.strip().strip() for line in lines] - matches = map(self.number_regexp.match, texts) + matches = map(list_item_regexp.match, texts) numbers = [(line_id, match.group().strip()) for line_id, match in enumerate(matches) if match] for num, (line_id, number) in enumerate(numbers): - searched = self.ends_of_number.search(number) + searched = end_regexp.search(number) if searched is not None: numbers[num] = (line_id, number[:searched.start()]) if number.endswith((")", "}", ".")): @@ -241,8 +326,8 @@ def _list_features(self, lines: List[LineWithMeta]) -> List[float]: @staticmethod def _get_features_quantile(feature_column: pd.Series) -> pd.Series: """ - @param feature_column: column with one feature - @return: column with feature's quantiles + :param feature_column: column with one feature + :return: column with feature's quantiles """ feature_column = feature_column.fillna(feature_column.min() - 1) feature_column_s = np.sort(feature_column) @@ -255,7 +340,9 @@ def _get_features_quantile(feature_column: pd.Series) -> pd.Series: def _normalize_features(feature_column: pd.Series) -> pd.Series: """ new_feature = (feature - mean) / (max - min) if max = min else 0 - Output: normalized feature vector [-1; 1] + + :param feature_column: column with one feature + :return: normalized feature vector [-1; 1] """ feature_mean, feature_min, feature_max = feature_column.mean(), feature_column.min(), feature_column.max() new_feature_column = (feature_column - feature_mean) / (feature_max - feature_min) if feature_max - feature_min != 0.0 else 0.0 diff --git a/dedoc/structure_extractors/feature_extractors/law_text_features.py b/dedoc/structure_extractors/feature_extractors/law_text_features.py index 333af89a..8969af04 100644 --- a/dedoc/structure_extractors/feature_extractors/law_text_features.py +++ b/dedoc/structure_extractors/feature_extractors/law_text_features.py @@ -8,6 +8,7 @@ from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor from dedoc.structure_extractors.feature_extractors.list_features.list_features_extractor import ListFeaturesExtractor from dedoc.structure_extractors.feature_extractors.utils_feature_extractor import normalization_by_min_max +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_year class LawTextFeatures(AbstractFeatureExtractor): @@ -124,7 +125,7 @@ def _one_line_features(self, line: LineWithMeta, total_lines: int, start_page: i yield "font_size", self._get_size(line) yield "line_id", normalization_by_min_max(line.metadata.line_id, min_v=0, max_v=total_lines) - yield "num_year_regexp", len(self.year_regexp.findall(line.line)) + yield "num_year_regexp", len(regexps_year.findall(line.line)) yield "endswith_dot", float(line.line.strip().endswith(".")) yield "endswith_semicolon", float(line.line.strip().endswith(";")) yield "endswith_colon", float(line.line.strip().endswith(":")) diff --git a/dedoc/structure_extractors/feature_extractors/list_features/list_features_extractor.py b/dedoc/structure_extractors/feature_extractors/list_features/list_features_extractor.py index 7a048bb1..f424a612 100644 --- a/dedoc/structure_extractors/feature_extractors/list_features/list_features_extractor.py +++ b/dedoc/structure_extractors/feature_extractors/list_features/list_features_extractor.py @@ -22,6 +22,13 @@ def __init__(self, indent_std: float, prefix_before: List[LinePrefix], prefix_af class ListFeaturesExtractor(AbstractFeatureExtractor): + """ + Extracts features for list items: + - indentation of items is analysed (same/different) + - prefixes of items are analysed, if items can be predecessors of others + + The analysis is executed in a window of a fixed size (size of window = number of neighbor lines) + """ def __init__(self, window_size: int = 25, prefix_list: Optional[List[LinePrefix]] = None) -> None: super().__init__() @@ -43,9 +50,9 @@ def one_document(self, doc: List[LineWithMeta]) -> Tuple[List[LinePrefix], pd.Da indents = np.array([prefix.indent for prefix in prefixes]) res = [] doc_size = len(prefixes) - for line_id, (line, prefix) in enumerate(zip(doc, prefixes)): + for line_id, (_line, prefix) in enumerate(zip(doc, prefixes)): window = self._get_window(indents=indents, prefixes=prefixes, line_id=line_id, doc_size=doc_size) - features = self._one_line_features(line=line, line_id=line_id, window=window, prefix=prefix) + features = self._one_line_features(window=window, prefix=prefix) res.append(features) features_dict = defaultdict(list) for features in res: @@ -53,7 +60,7 @@ def one_document(self, doc: List[LineWithMeta]) -> Tuple[List[LinePrefix], pd.Da features_dict[feature_name].append(feature_value) return prefixes, pd.DataFrame(features_dict) - def _one_line_features(self, line: LineWithMeta, prefix: LinePrefix, line_id: int, window: Window) -> Dict[str, float]: + def _one_line_features(self, prefix: LinePrefix, window: Window) -> Dict[str, float]: predecessor_num = 0 predecessor_num_same_indent = 0 same_indent = 0 diff --git a/dedoc/structure_extractors/feature_extractors/list_features/prefix/prefix.py b/dedoc/structure_extractors/feature_extractors/list_features/prefix/prefix.py index 5869d086..56bca3ac 100644 --- a/dedoc/structure_extractors/feature_extractors/list_features/prefix/prefix.py +++ b/dedoc/structure_extractors/feature_extractors/list_features/prefix/prefix.py @@ -56,7 +56,7 @@ def predecessor(self, other: "LinePrefix") -> bool: >>> this_one.predecessor(other_one) True - :param other: other prefix + :param other: prefix of other line :return: true if other is valid predecessor for this one, false otherwise """ pass @@ -68,10 +68,8 @@ def successor(self, other: "LinePrefix") -> bool: @abc.abstractmethod def is_valid(prefix_str: str) -> bool: """ - returns true if prefix_str is valid for this type of prefix, false otherwise. - :param prefix_str: the string representation of the prefix - For example "1." is valid for DottedPrefix - :return: + :param prefix_str: the string representation of the prefix. For example "1." is valid for DottedPrefix + :return: true if prefix_str is valid for this type of prefix, false otherwise. """ pass diff --git a/dedoc/structure_extractors/feature_extractors/list_features/prefix/roman_prefix.py b/dedoc/structure_extractors/feature_extractors/list_features/prefix/roman_prefix.py index cca57f89..e30c6a44 100644 --- a/dedoc/structure_extractors/feature_extractors/list_features/prefix/roman_prefix.py +++ b/dedoc/structure_extractors/feature_extractors/list_features/prefix/roman_prefix.py @@ -28,6 +28,7 @@ def predecessor(self, other: "LinePrefix") -> bool: @staticmethod def is_valid(prefix_str: str) -> bool: + prefix_str = prefix_str.lower() if len(prefix_str) <= 1 or not prefix_str.endswith("."): return False prefix_set = set(prefix_str[:-1]) diff --git a/dedoc/structure_extractors/feature_extractors/tz_feature_extractor.py b/dedoc/structure_extractors/feature_extractors/tz_feature_extractor.py index fce1cef1..76c77f37 100644 --- a/dedoc/structure_extractors/feature_extractors/tz_feature_extractor.py +++ b/dedoc/structure_extractors/feature_extractors/tz_feature_extractor.py @@ -10,6 +10,7 @@ from dedoc.structure_extractors.feature_extractors.list_features.prefix.bullet_prefix import BulletPrefix from dedoc.structure_extractors.feature_extractors.toc_feature_extractor import TOCFeatureExtractor from dedoc.structure_extractors.feature_extractors.utils_feature_extractor import normalization_by_min_max +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_number, regexps_subitem_extended, regexps_year from dedoc.utils.utils import flatten @@ -21,7 +22,7 @@ class TzTextFeatures(AbstractFeatureExtractor): def __init__(self, text_features_only: bool = False) -> None: super().__init__() self.list_item_regexp = [ - self.item_extended_regexp, + regexps_subitem_extended, re.compile(r"^\s*[IVX]+"), # roman numerals # https://stackoverflow.com/questions/267399/how-do-you-match-only-valid- # roman-numerals-with-a-regular-expression @@ -85,9 +86,9 @@ def _one_line_features(self, line: LineWithMeta, total_lines: int, start_page: i yield ("is_upper", 1) if line.line.strip().isupper() else ("is_upper", 0) yield ("is_lower", 1) if line.line.strip().islower() else ("is_lower", 0) yield "day_month_regexp", int("дней" in text) + int("месяцев" in text) - yield "year_regexp", len(self.year_regexp.findall(text)) + yield "year_regexp", len(regexps_year.findall(text)) - number = self.number_regexp.match(text) + number = regexps_number.match(text) number = number.group().strip() if number else "" if number.endswith((")", "}")): number = number[:-1] diff --git a/dedoc/structure_extractors/hierarchy_level_builders/law_builders/body_builder/abstract_body_hierarchy_level_builder.py b/dedoc/structure_extractors/hierarchy_level_builders/law_builders/body_builder/abstract_body_hierarchy_level_builder.py index 95fb9b05..f7efcbae 100644 --- a/dedoc/structure_extractors/hierarchy_level_builders/law_builders/body_builder/abstract_body_hierarchy_level_builder.py +++ b/dedoc/structure_extractors/hierarchy_level_builders/law_builders/body_builder/abstract_body_hierarchy_level_builder.py @@ -6,18 +6,17 @@ from dedoc.data_structures.hierarchy_level import HierarchyLevel from dedoc.data_structures.line_metadata import LineMetadata from dedoc.data_structures.line_with_meta import LineWithMeta -from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor from dedoc.structure_extractors.feature_extractors.law_text_features import LawTextFeatures from dedoc.structure_extractors.hierarchy_level_builders.abstract_hierarchy_level_builder import AbstractHierarchyLevelBuilder from dedoc.structure_extractors.hierarchy_level_builders.law_builders.structure_unit.abstract_structure_unit import AbstractStructureUnit -from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_item_with_bracket, regexps_subitem +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_ends_of_number, regexps_item_with_bracket, regexps_number, regexps_subitem class AbstractBodyHierarchyLevelBuilder(AbstractHierarchyLevelBuilder, abc.ABC): starting_line_types = ["body"] regexps_item = regexps_item_with_bracket - regexps_part = AbstractFeatureExtractor.number_regexp - ends_of_number = AbstractFeatureExtractor.ends_of_number + regexps_part = regexps_number + ends_of_number = regexps_ends_of_number regexps_subitem = regexps_subitem @property diff --git a/dedoc/structure_extractors/hierarchy_level_builders/law_builders/structure_unit/law_structure_unit.py b/dedoc/structure_extractors/hierarchy_level_builders/law_builders/structure_unit/law_structure_unit.py index 313fde91..e9a64a35 100644 --- a/dedoc/structure_extractors/hierarchy_level_builders/law_builders/structure_unit/law_structure_unit.py +++ b/dedoc/structure_extractors/hierarchy_level_builders/law_builders/structure_unit/law_structure_unit.py @@ -1,17 +1,16 @@ from typing import Optional, Tuple from dedoc.data_structures.hierarchy_level import HierarchyLevel -from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor from dedoc.structure_extractors.feature_extractors.law_text_features import LawTextFeatures from dedoc.structure_extractors.hierarchy_level_builders.law_builders.structure_unit.abstract_structure_unit import AbstractStructureUnit -from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_foiv_item, regexps_item_with_bracket, regexps_subitem +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_ends_of_number, regexps_foiv_item, regexps_item_with_bracket, regexps_subitem class LawStructureUnitBuilder(AbstractStructureUnit): document_types = ["law"] regexps_item_with_bracket = regexps_item_with_bracket regexps_part = regexps_foiv_item - ends_of_number = AbstractFeatureExtractor.ends_of_number + ends_of_number = regexps_ends_of_number regexps_subitem = regexps_subitem def structure_unit(self, text: str, init_hl_depth: int, previous_hl: Optional[HierarchyLevel]) -> Tuple[HierarchyLevel, Optional[HierarchyLevel]]: diff --git a/dedoc/structure_extractors/hierarchy_level_builders/tz_builder/body_builder.py b/dedoc/structure_extractors/hierarchy_level_builders/tz_builder/body_builder.py index a94b0d2b..51730082 100644 --- a/dedoc/structure_extractors/hierarchy_level_builders/tz_builder/body_builder.py +++ b/dedoc/structure_extractors/hierarchy_level_builders/tz_builder/body_builder.py @@ -7,6 +7,7 @@ from dedoc.structure_extractors.hierarchy_level_builders.abstract_hierarchy_level_builder import AbstractHierarchyLevelBuilder from dedoc.structure_extractors.hierarchy_level_builders.law_builders.body_builder.abstract_body_hierarchy_level_builder import \ AbstractBodyHierarchyLevelBuilder +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_number, regexps_subitem class TzBodyBuilder(AbstractHierarchyLevelBuilder): @@ -46,14 +47,14 @@ def __handle_item(self, init_hl_depth: int, line: LineWithMeta, prediction: str, hierarchy_level = HierarchyLevel(item_min_depth + 2, 1, False, prediction) else: hierarchy_level = HierarchyLevel(item_min_depth + 2, 0, False, prediction) - elif TzTextFeatures.number_regexp.match(text): - match = TzTextFeatures.number_regexp.match(text) + elif regexps_number.match(text): + match = regexps_number.match(text) number = text[match.start(): match.end()] number_splitted = [n for n in number.strip().split(".") if n.isnumeric()] hierarchy_level = HierarchyLevel(item_min_depth + 3, len(number_splitted), False, prediction) elif BulletPrefix.regexp.match(text): hierarchy_level = HierarchyLevel(item_min_depth + 4, 0, False, prediction) - elif TzTextFeatures.item_regexp.match(text): + elif regexps_subitem.match(text): hierarchy_level = HierarchyLevel(item_min_depth + 4, 0, False, prediction) else: hierarchy_level = HierarchyLevel.create_raw_text() diff --git a/dedoc/structure_extractors/line_type_classifiers/abstract_line_type_classifier.py b/dedoc/structure_extractors/line_type_classifiers/abstract_line_type_classifier.py index 906eab5e..64e289b3 100644 --- a/dedoc/structure_extractors/line_type_classifiers/abstract_line_type_classifier.py +++ b/dedoc/structure_extractors/line_type_classifiers/abstract_line_type_classifier.py @@ -1,29 +1,23 @@ import abc -from typing import List +from typing import List, Optional from dedoc.data_structures.line_with_meta import LineWithMeta class AbstractLineTypeClassifier(abc.ABC): - - document_type = None - - def __init__(self, *, config: dict) -> None: - self.config = config - self.chunk_start_tags = ["header", "body"] - self.hl_type = "" - self._chunk_hl_builders = [] + """ + Abstract class for lines classification with predict method. + """ + def __init__(self, *, config: Optional[dict] = None) -> None: + self.config = {} if config is None else config @abc.abstractmethod def predict(self, lines: List[LineWithMeta]) -> List[str]: """ - :param lines: image and bboxes with text, it is useful for feature extraction and label predictions - :return: lines with metadata and predicted labels and hierarchy levels + Predict the line type according to some domain. + For this purpose, some pretrained classifier may be used. + + :param lines: list of document lines + :return: list predicted labels for each line """ pass - - def get_chunk_start_tags(self) -> List[str]: - return self.chunk_start_tags - - def get_hl_type(self) -> str: - return self.hl_type diff --git a/dedoc/structure_extractors/line_type_classifiers/abstract_pickled_classifier.py b/dedoc/structure_extractors/line_type_classifiers/abstract_pickled_classifier.py index 1e04c98c..e5e62aef 100644 --- a/dedoc/structure_extractors/line_type_classifiers/abstract_pickled_classifier.py +++ b/dedoc/structure_extractors/line_type_classifiers/abstract_pickled_classifier.py @@ -3,23 +3,33 @@ import os import pickle from abc import ABC -from typing import Tuple +from typing import Optional, Tuple from xgboost import XGBClassifier - from dedoc.download_models import download_from_hub from dedoc.structure_extractors.line_type_classifiers.abstract_line_type_classifier import AbstractLineTypeClassifier from dedoc.utils.parameter_utils import get_param_gpu_available class AbstractPickledLineTypeClassifier(AbstractLineTypeClassifier, ABC): + """ + Abstract class for lines classification with functionality of loading and saving a classifier. + """ - def __init__(self, *, config: dict) -> None: + def __init__(self, *, config: Optional[dict] = None) -> None: super().__init__(config=config) self.logger = self.config.get("logger", logging.getLogger()) def load(self, classifier_type: str, path: str) -> Tuple[XGBClassifier, dict]: + """ + Load the pickled classifier with parameters for a feature extractor. + + :param classifier_type: name of the classifier to load from huggingface in case `path` does not exist + (https://huggingface.co/dedoc/line_type_classifiers) + :param path: path from where to load the pickled classifier + :return: loaded XGBClassifier and parameters for a feature extractor + """ if not os.path.isfile(path): out_dir, out_name = os.path.split(path) download_from_hub(out_dir=out_dir, out_name=out_name, repo_name="line_type_classifiers", hub_name=f"{classifier_type}.pkl.gz") @@ -34,12 +44,19 @@ def load(self, classifier_type: str, path: str) -> Tuple[XGBClassifier, dict]: return classifier, feature_extractor_parameters - def save(self, path_out: str, parameters: object) -> str: + def save(self, path_out: str, object_for_saving: object) -> str: + """ + Save the pickled classifier (with initialization parameters for a feature extractor) into the `.pkl.gz` file with path=`path_out` + + :param path_out: path (with file name) where to save the object + :param object_for_saving: classifier with feature extractor's parameters to save + :return: the resulting path of the saved file + """ if path_out.endswith(".pkl"): path_out += ".gz" elif not path_out.endswith(".gz"): path_out += ".pkl.gz" with gzip.open(path_out, "wb") as file_out: - pickle.dump(obj=parameters, file=file_out) + pickle.dump(obj=object_for_saving, file=file_out) return path_out diff --git a/dedoc/train_dataset/data_path_config.py b/dedoc/train_dataset/data_path_config.py deleted file mode 100644 index 2cc68f9b..00000000 --- a/dedoc/train_dataset/data_path_config.py +++ /dev/null @@ -1 +0,0 @@ -table_path = "/tmp/tables" diff --git a/dedoc/train_dataset/train_dataset_utils.py b/dedoc/train_dataset/train_dataset_utils.py deleted file mode 100644 index aec0a9d5..00000000 --- a/dedoc/train_dataset/train_dataset_utils.py +++ /dev/null @@ -1,79 +0,0 @@ -import json -import os -import shutil -import zipfile -from typing import List - -import PIL -import numpy as np -from PIL.Image import Image - - -def __to_pil(image: np.ndarray) -> Image: - return PIL.Image.fromarray(image) - - -def __create_images_path(config: dict) -> None: - assert config.get("labeling_mode") - assert config.get("intermediate_data_path") is not None - if not os.path.isdir(config.get("intermediate_data_path")): - os.makedirs(config.get("intermediate_data_path")) - - -def get_path_original_documents(config: dict) -> str: - path = os.path.join(config["intermediate_data_path"], "original_documents") - os.makedirs(path, exist_ok=True) - return path - - -def _get_images_path(config: dict, document_name: str) -> str: - return os.path.join(get_path_original_documents(config), document_name.split(".")[0]) - - -def save_page_with_bbox(page: "PageWithBBox", document_name: str, *, config: dict) -> None: # noqa - __create_images_path(config) - uid = document_name - images_path = _get_images_path(config=config, document_name=document_name) - if not os.path.isdir(images_path): - os.makedirs(images_path) - - with open(os.path.join(config["intermediate_data_path"], "bboxes.jsonlines"), "a") as out: - image = __to_pil(page.image) - image_name = f"img_{uid}_{page.page_num:06d}.png" - image.save(os.path.join(images_path, image_name)) - for bbox in page.bboxes: - bbox_dict = bbox.to_dict() - bbox_dict["original_image"] = image_name - out.write(json.dumps(bbox_dict, ensure_ascii=False)) - out.write("\n") - - -def _convert2zip(config: dict, document_name: str) -> str: - images_path = _get_images_path(config=config, document_name=document_name) - images = [os.path.join(images_path, file) for file in sorted(os.listdir(images_path))] - - archive_filename = images_path + ".zip" - with zipfile.ZipFile(archive_filename, "w") as archive: - for image in images: - archive.write(filename=image, arcname=os.path.basename(image)) - shutil.rmtree(images_path) - return archive_filename - - -def save_line_with_meta(lines: List["LineWithMeta"], original_document: str, *, config: dict) -> None: # noqa - - __create_images_path(config) - if original_document.endswith((".jpg", ".png", ".pdf")): - original_document = _convert2zip(config=config, document_name=original_document) - - with open(os.path.join(config["intermediate_data_path"], "lines.jsonlines"), "a") as out: - for line in lines: - # setattr(line, "uid", line.metadata.bbox_uid) - line_dict = json.loads(json.dumps(line, default=lambda o: o.__dict__, ensure_ascii=False)) - line_dict["original_document"] = os.path.basename(original_document) - out.write(json.dumps(line_dict, ensure_ascii=False)) - out.write("\n") - - -def get_original_document_path(path2documents: str, page: List[dict]) -> str: - return os.path.join(path2documents, page[0]["original_document"]) diff --git a/dedoc/train_dataset/trainer/xgboost_line_classifier_trainer.py b/dedoc/train_dataset/trainer/xgboost_line_classifier_trainer.py deleted file mode 100644 index 4ba65d22..00000000 --- a/dedoc/train_dataset/trainer/xgboost_line_classifier_trainer.py +++ /dev/null @@ -1,38 +0,0 @@ -import logging -from typing import Callable, List, Optional - -import xgbfir -from xgboost import XGBClassifier - -from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor -from dedoc.train_dataset.data_structures.line_with_label import LineWithLabel -from dedoc.train_dataset.trainer.base_sklearn_line_classifier import BaseSklearnLineClassifierTrainer - - -class XGBoostLineClassifierTrainer(BaseSklearnLineClassifierTrainer): - - def __init__(self, - data_url: str, - logger: logging.Logger, - feature_extractor: AbstractFeatureExtractor, - path_out: str, - path_scores: Optional[str] = None, - path_features_importances: Optional[str] = None, - tmp_dir: Optional[str] = None, - train_size: float = 0.75, - classifier_parameters: dict = None, - label_transformer: Callable[[str], str] = None, - random_seed: int = 42, - get_sample_weight: Callable[[LineWithLabel], float] = None, - n_splits: int = 10, - *, config: dict) -> None: - - super().__init__(data_url, logger, feature_extractor, path_out, path_scores, path_features_importances, tmp_dir, - train_size, classifier_parameters, label_transformer, random_seed, get_sample_weight, n_splits, - config=config) - - def _get_classifier(self) -> XGBClassifier: - return XGBClassifier(random_state=self.random_seed, **self.classifier_parameters) - - def _save_features_importances(self, cls: XGBClassifier, feature_names: List[str]) -> None: - xgbfir.saveXgbFI(cls, feature_names=feature_names, OutputXlsxFile=self.path_features_importances) diff --git a/dedoc/utils/train_dataset_utils.py b/dedoc/utils/train_dataset_utils.py new file mode 100644 index 00000000..fc29e8f4 --- /dev/null +++ b/dedoc/utils/train_dataset_utils.py @@ -0,0 +1,68 @@ +import json +import os +from typing import List + +import PIL +import numpy as np +from PIL.Image import Image + + +def __to_pil(image: np.ndarray) -> Image: + return PIL.Image.fromarray(image) + + +def __create_images_path(config: dict) -> None: + assert config.get("labeling_mode") + assert config.get("intermediate_data_path") is not None + if not os.path.isdir(config.get("intermediate_data_path")): + os.makedirs(config.get("intermediate_data_path")) + + +def get_path_original_documents(config: dict) -> str: + path = os.path.join(config["intermediate_data_path"], "original_documents") + os.makedirs(path, exist_ok=True) + return path + + +def _get_images_path(config: dict, document_name: str) -> str: + images_path = os.path.join(get_path_original_documents(config), document_name.split(".")[0]) + os.makedirs(images_path, exist_ok=True) + return images_path + + +def save_line_with_meta(lines: List["LineWithMeta"], original_document: str, *, config: dict) -> None: # noqa + __create_images_path(config) + + # merge lines with the same bbox + lines = __postprocess_lines(lines) + + with open(os.path.join(config["intermediate_data_path"], "lines.jsonlines"), "a") as out: + for line in lines: + # setattr(line, "uid", line.metadata.bbox_uid) + line_dict = json.loads(json.dumps(line, default=lambda o: o.__dict__, ensure_ascii=False)) + line_dict["original_document"] = os.path.basename(original_document) + out.write(json.dumps(line_dict, ensure_ascii=False)) + out.write("\n") + + +def __postprocess_lines(lines: List["LineWithMeta"]) -> List["LineWithMeta"]: # noqa + postprocessed_lines = [] + prev_bbox = None + for line in lines: + bbox_annotations = [annotation for annotation in line.annotations if annotation.name == "bounding box"] + if not bbox_annotations: + postprocessed_lines.append(line) + continue + + bbox = bbox_annotations[0].value + if not prev_bbox or prev_bbox != bbox: + postprocessed_lines.append(line) + prev_bbox = bbox + continue + postprocessed_lines[-1] += line + + return postprocessed_lines + + +def get_original_document_path(path2documents: str, page: List[dict]) -> str: + return os.path.join(path2documents, page[0]["original_document"]) diff --git a/docker-compose.yml b/docker-compose.yml index 7c7be0d0..904c36d8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,8 +18,8 @@ services: depends_on: - dedoc build: - context: . - dockerfile: Dockerfile + context: . + dockerfile: Dockerfile tty: true environment: DOC_READER_HOST: "dedoc" diff --git a/docker_gpu/Dockerfile b/docker_gpu/Dockerfile index 95fb012a..e3fa713b 100644 --- a/docker_gpu/Dockerfile +++ b/docker_gpu/Dockerfile @@ -21,4 +21,4 @@ RUN echo "__version__ = \"$(cat /dedoc_root/VERSION)\"" > /dedoc_root/dedoc/vers ADD tests /dedoc_root/tests ADD resources /dedoc_root/resources -CMD ["python3", "/dedoc_root/dedoc/main.py", "-c", "/dedoc_root/dedoc/config.py"] \ No newline at end of file +CMD ["python3", "/dedoc_root/dedoc/main.py"] \ No newline at end of file diff --git a/docker_gpu/docker-compose.yml b/docker_gpu/docker-compose.yml index 2434b5f0..a29288e5 100644 --- a/docker_gpu/docker-compose.yml +++ b/docker_gpu/docker-compose.yml @@ -19,8 +19,8 @@ services: depends_on: - dedoc build: - context: .. - dockerfile: docker_gpu/Dockerfile + context: .. + dockerfile: docker_gpu/Dockerfile tty: true environment: DOC_READER_HOST: "dedoc" diff --git a/docs/source/_static/add_new_structure_type/archive.zip b/docs/source/_static/add_new_structure_type/archive.zip new file mode 100644 index 00000000..c78ea5c1 Binary files /dev/null and b/docs/source/_static/add_new_structure_type/archive.zip differ diff --git a/docs/source/_static/add_new_structure_type/article_classifier_000000_UX6.json b/docs/source/_static/add_new_structure_type/article_classifier_000000_UX6.json new file mode 100644 index 00000000..c7e3da40 --- /dev/null +++ b/docs/source/_static/add_new_structure_type/article_classifier_000000_UX6.json @@ -0,0 +1,54825 @@ +{ + "0": { + "id": 0, + "task_path": "images/000001_000000_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10005.jpg", + "labeled": [ + "title" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 78, + "y_top_left": 61, + "width": 267, + "height": 8 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10005, + "_line": "A State-of-Art Review on Automatic Video\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10005, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 41, + "name": "indentation", + "value": "78", + "is_mergeable": true + }, + { + "start": 0, + "end": 41, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "14", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "size", + "value": "14", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "size", + "value": "14", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "14", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "size", + "value": "14", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "size", + "value": "14", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 41, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1776765375854214, \"y_top_left\": 0.0915915915915916, \"width\": 0.6082004555808656, \"height\": 0.012012012012012012, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10005", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10005

text A State-of-Art Review on Automatic Video\n

", + "default_label": "raw_text" + }, + "1": { + "id": 1, + "task_path": "images/000001_000001_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10007.jpg", + "labeled": [ + "title" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 140, + "y_top_left": 79, + "width": 143, + "height": 8 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10007, + "_line": "Annotation Techniques\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10007, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 22, + "name": "indentation", + "value": "140", + "is_mergeable": true + }, + { + "start": 0, + "end": 22, + "name": "spacing", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "14", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 11, + "end": 21, + "name": "size", + "value": "14", + "is_mergeable": true + }, + { + "start": 11, + "end": 21, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 22, + "name": "bounding box", + "value": "{\"x_top_left\": 0.31890660592255127, \"y_top_left\": 0.11861861861861862, \"width\": 0.32574031890660593, \"height\": 0.012012012012012012, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10007", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10007

text Annotation Techniques\n

", + "default_label": "raw_text" + }, + "2": { + "id": 2, + "task_path": "images/000001_000002_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10012.jpg", + "labeled": [ + "author" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 143, + "y_top_left": 116, + "width": 138, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10012, + "_line": "Krunal (&) Randive and R. Mohan\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10012, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 32, + "name": "indentation", + "value": "143", + "is_mergeable": true + }, + { + "start": 0, + "end": 32, + "name": "spacing", + "value": "29", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 32, + "name": "bounding box", + "value": "{\"x_top_left\": 0.32574031890660593, \"y_top_left\": 0.17417417417417416, \"width\": 0.3143507972665148, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + }, + { + "start": 0, + "end": 32, + "name": "attachment", + "value": "attach_0cab9eae-d429-426f-b5a4-9275587bfd0a", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10012", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10012

text Krunal (&) Randive and R. Mohan\n

", + "default_label": "raw_text" + }, + "3": { + "id": 3, + "task_path": "images/000001_000003_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10018.jpg", + "labeled": [ + "affiliation" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 119, + "y_top_left": 137, + "width": 185, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10018, + "_line": "Department of Computer Science and Engineering,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10018, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 48, + "name": "indentation", + "value": "119", + "is_mergeable": true + }, + { + "start": 0, + "end": 48, + "name": "spacing", + "value": "14", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 13, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 11, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 30, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 23, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 47, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 35, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 48, + "name": "bounding box", + "value": "{\"x_top_left\": 0.27107061503416857, \"y_top_left\": 0.2057057057057057, \"width\": 0.4214123006833713, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10018", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10018

text Department of Computer Science and Engineering,\n

", + "default_label": "raw_text" + }, + "4": { + "id": 4, + "task_path": "images/000001_000004_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10027.jpg", + "labeled": [ + "affiliation" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 73, + "y_top_left": 148, + "width": 278, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10027, + "_line": "National Institute of Technology, Tiruchirappalli 620015, Tamil Nadu, India\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10027, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 76, + "name": "indentation", + "value": "73", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 18, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 9, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 21, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 19, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 33, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 22, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 49, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 34, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 63, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 58, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 75, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 70, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1662870159453303, \"y_top_left\": 0.2222222222222222, \"width\": 0.6332574031890661, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10027", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10027

text National Institute of Technology, Tiruchirappalli 620015, Tamil Nadu, India\n

", + "default_label": "raw_text" + }, + "5": { + "id": 5, + "task_path": "images/000001_000005_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10029.jpg", + "labeled": [ + "affiliation" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 105, + "y_top_left": 160, + "width": 214, + "height": 3 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10029, + "_line": "krunalrandive@gmail.com, rmohan@nitt.edu\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10029, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 41, + "name": "indentation", + "value": "105", + "is_mergeable": true + }, + { + "start": 0, + "end": 41, + "name": "spacing", + "value": "7", + "is_mergeable": true + }, + { + "start": 0, + "end": 24, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 24, + "name": "style", + "value": "OAMFJL+AdvTYP-MT", + "is_mergeable": true + }, + { + "start": 25, + "end": 40, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 25, + "end": 40, + "name": "style", + "value": "OAMFJL+AdvTYP-MT", + "is_mergeable": true + }, + { + "start": 0, + "end": 41, + "name": "bounding box", + "value": "{\"x_top_left\": 0.23917995444191345, \"y_top_left\": 0.24024024024024024, \"width\": 0.4874715261958998, \"height\": 0.0045045045045045045, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10029", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10029

text krunalrandive@gmail.com, rmohan@nitt.edu\n

", + "default_label": "raw_text" + }, + "6": { + "id": 6, + "task_path": "images/000001_000006_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10040.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 192, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10040, + "_line": "Abstract. Video annotation has gained attention because of the rapid devel-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10040, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 76, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "spacing", + "value": "28", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 26, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 16, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 47, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 38, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 75, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 69, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.2882882882882883, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10040", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10040

text Abstract. Video annotation has gained attention because of the rapid devel-\n

", + "default_label": "raw_text" + }, + "7": { + "id": 7, + "task_path": "images/000001_000007_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10053.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 203, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10053, + "_line": "opment of video information and wide usage of video analysis in all directions.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10053, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 27, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 16, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 36, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 32, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 67, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 64, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 79, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 68, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.3048048048048048, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10053", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10053

text opment of video information and wide usage of video analysis in all directions.\n

", + "default_label": "raw_text" + }, + "8": { + "id": 8, + "task_path": "images/000001_000008_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10066.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 214, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10066, + "_line": "With the capacity of depicting video at the semantic level, video annotation has\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10066, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 17, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 9, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 30, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 21, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 39, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 37, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 43, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 40, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 52, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 44, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 59, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 53, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 65, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 60, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 76, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 66, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.3213213213213213, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10066", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10066

text With the capacity of depicting video at the semantic level, video annotation has\n

", + "default_label": "raw_text" + }, + "9": { + "id": 9, + "task_path": "images/000001_000009_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10077.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 225, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10077, + "_line": "numerous applications in video analysis. Due to the shortcomings present in\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10077, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 76, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 21, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 9, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 30, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 25, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 40, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 31, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 64, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 52, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 72, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 65, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 75, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 73, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.33783783783783783, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10077", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10077

text numerous applications in video analysis. Due to the shortcomings present in\n

", + "default_label": "raw_text" + }, + "10": { + "id": 10, + "task_path": "images/000001_000010_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10087.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 236, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10087, + "_line": "manual video annotation, Automatic Video Annotation was introduced. In this\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10087, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 76, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 12, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 7, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 24, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 13, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 67, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 56, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 70, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 68, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.35435435435435436, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10087", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10087

text manual video annotation, Automatic Video Annotation was introduced. In this\n

", + "default_label": "raw_text" + }, + "11": { + "id": 11, + "task_path": "images/000001_000011_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10096.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 247, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10096, + "_line": "paper, distinctive methodologies of automatic video annotation are discussed.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10096, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 18, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 7, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 32, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 19, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 45, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 36, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 77, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 67, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.3708708708708709, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10096", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10096

text paper, distinctive methodologies of automatic video annotation are discussed.\n

", + "default_label": "raw_text" + }, + "12": { + "id": 12, + "task_path": "images/000001_000012_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10107.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 258, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10107, + "_line": "These models are classified into five classes namely, (1) Generative models,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10107, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 75, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 75, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 12, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 6, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 26, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 17, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 31, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 27, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 35, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 32, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 43, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 36, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 51, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 44, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 66, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 56, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 74, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 67, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 75, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.38738738738738737, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10107", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10107

text These models are classified into five classes namely, (1) Generative models,\n

", + "default_label": "raw_text" + }, + "13": { + "id": 13, + "task_path": "images/000001_000013_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10116.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 269, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10116, + "_line": "(2) Distance-based similarity model, (3) Discriminative model, (4) Ontology-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10116, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 77, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 18, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 4, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 29, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 19, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 36, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 30, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 55, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 41, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 62, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 56, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 76, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 67, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.4039039039039039, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10116", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10116

text (2) Distance-based similarity model, (3) Discriminative model, (4) Ontology-\n

", + "default_label": "raw_text" + }, + "14": { + "id": 14, + "task_path": "images/000001_000014_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10126.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 280, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10126, + "_line": "based models, (5) Deep Learning-based models. The key theoretical contribu-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10126, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 76, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 13, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 6, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 17, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 14, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 22, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 18, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 37, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 23, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 45, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 38, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 53, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 50, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 65, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 54, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 75, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 66, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 76, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.42042042042042044, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10126", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10126

text based models, (5) Deep Learning-based models. The key theoretical contribu-\n

", + "default_label": "raw_text" + }, + "15": { + "id": 15, + "task_path": "images/000001_000015_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10139.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 291, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10139, + "_line": "tions in the current decade in support of video annotation strategies are dis-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10139, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 8, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 6, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 20, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 13, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 27, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 21, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 38, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 31, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 41, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 39, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 58, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 48, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 69, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 59, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 78, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 74, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.4369369369369369, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10139", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10139

text tions in the current decade in support of video annotation strategies are dis-\n

", + "default_label": "raw_text" + }, + "16": { + "id": 16, + "task_path": "images/000001_000016_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10149.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 302, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10149, + "_line": "cussed. Additionally, the future directions concerning the research aspect of\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10149, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 21, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 8, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 32, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 26, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 43, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 33, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 54, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 44, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 67, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 59, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 74, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 68, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 77, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 75, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.45345345345345345, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10149", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10149

text cussed. Additionally, the future directions concerning the research aspect of\n

", + "default_label": "raw_text" + }, + "17": { + "id": 17, + "task_path": "images/000001_000017_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10154.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 313, + "width": 149, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10154, + "_line": "video annotation strategies are discussed.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10154, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 43, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 43, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 27, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 17, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 43, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.46996996996997, \"width\": 0.33940774487471526, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10154", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10154

text video annotation strategies are discussed.\n

", + "default_label": "raw_text" + }, + "18": { + "id": 18, + "task_path": "images/000001_000018_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10165.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 335, + "width": 286, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10165, + "_line": "Keywords: Automatic Video Annotation (AVA) \u0001 Deep learning \u0001 Ontology \u0001\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10165, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 72, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 72, + "name": "spacing", + "value": "16", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJN+AdvP4C4E74", + "is_mergeable": true + }, + { + "start": 10, + "end": 19, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 10, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 25, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 20, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 44, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 43, + "end": 44, + "name": "style", + "value": "OAMFJN+AdvP4C4E74", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "style", + "value": "OAMFJN+AdvP4C4E74", + "is_mergeable": true + }, + { + "start": 50, + "end": 58, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 50, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 60, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 59, + "end": 60, + "name": "style", + "value": "OAMFJN+AdvP4C4E74", + "is_mergeable": true + }, + { + "start": 61, + "end": 69, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 61, + "end": 69, + "name": "style", + "value": "OAMFJN+AdvP4C4E74", + "is_mergeable": true + }, + { + "start": 70, + "end": 71, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 70, + "end": 71, + "name": "style", + "value": "OAMFJN+AdvP4C4E74", + "is_mergeable": true + }, + { + "start": 0, + "end": 72, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.503003003003003, \"width\": 0.6514806378132119, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10165", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10165

text Keywords: Automatic Video Annotation (AVA) \u0001 Deep learning \u0001 Ontology \u0001\n

", + "default_label": "raw_text" + }, + "19": { + "id": 19, + "task_path": "images/000001_000019_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10172.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 69, + "y_top_left": 346, + "width": 213, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10172, + "_line": "Feature extraction \u0001 Convolutional Neural Network (CNN)\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10172, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 56, + "name": "indentation", + "value": "69", + "is_mergeable": true + }, + { + "start": 0, + "end": 56, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJN+AdvP4C4E74", + "is_mergeable": true + }, + { + "start": 8, + "end": 18, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 8, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 20, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 19, + "end": 20, + "name": "style", + "value": "OAMFJN+AdvP4C4E74", + "is_mergeable": true + }, + { + "start": 21, + "end": 34, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 21, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 41, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 35, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 49, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 42, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 56, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1571753986332574, \"y_top_left\": 0.5195195195195195, \"width\": 0.48519362186788156, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10172", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10172

text Feature extraction \u0001 Convolutional Neural Network (CNN)\n

", + "default_label": "raw_text" + }, + "20": { + "id": 20, + "task_path": "images/000001_000020_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10174.jpg", + "labeled": [ + "named_item" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 377, + "width": 80, + "height": 6 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10174, + "_line": "1 Introduction\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10174, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 15, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "spacing", + "value": "26", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.566066066066066, \"width\": 0.18223234624145787, \"height\": 0.009009009009009009, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10174", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10174

text 1 Introduction\n

", + "default_label": "raw_text" + }, + "21": { + "id": 21, + "task_path": "images/000001_000021_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10186.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 403, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10186, + "_line": "With the recent development of online video data sharing platforms, for example,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10186, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "18", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6051051051051051, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10186", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10186

text With the recent development of online video data sharing platforms, for example,\n

", + "default_label": "raw_text" + }, + "22": { + "id": 22, + "task_path": "images/000001_000022_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10200.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 415, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10200, + "_line": "YouTube, the rate of growth and collection of video data have developed quickly and\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10200, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6231231231231231, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10200", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10200

text YouTube, the rate of growth and collection of video data have developed quickly and\n

", + "default_label": "raw_text" + }, + "23": { + "id": 23, + "task_path": "images/000001_000023_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10213.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 426, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10213, + "_line": "possess a major impact in day-to-day life. By the conventional video annotation sys-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10213, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6396396396396397, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10213", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10213

text possess a major impact in day-to-day life. By the conventional video annotation sys-\n

", + "default_label": "raw_text" + }, + "24": { + "id": 24, + "task_path": "images/000001_000024_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10227.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 438, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10227, + "_line": "tems marking video captions or comments at the semantic level, are not feasible with\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10227, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6576576576576577, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10227", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10227

text tems marking video captions or comments at the semantic level, are not feasible with\n

", + "default_label": "raw_text" + }, + "25": { + "id": 25, + "task_path": "images/000001_000025_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10238.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 450, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10238, + "_line": "enormous data available. The primary disadvantage of manual video annotations is\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10238, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6756756756756757, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10238", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10238

text enormous data available. The primary disadvantage of manual video annotations is\n

", + "default_label": "raw_text" + }, + "26": { + "id": 26, + "task_path": "images/000001_000026_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10252.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 462, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10252, + "_line": "intuitionistic. It is difficult to annotate on a large amount of video information manu-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10252, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6936936936936937, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10252", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10252

text intuitionistic. It is difficult to annotate on a large amount of video information manu-\n

", + "default_label": "raw_text" + }, + "27": { + "id": 27, + "task_path": "images/000001_000027_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10265.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 474, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10265, + "_line": "ally. This leads to uncertainty over video content. Hence, different people may infer\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10265, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7117117117117117, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10265", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10265

text ally. This leads to uncertainty over video content. Hence, different people may infer\n

", + "default_label": "raw_text" + }, + "28": { + "id": 28, + "task_path": "images/000001_000028_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10278.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 486, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10278, + "_line": "different ways of the understanding video. This happens due to the differences in\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10278, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7297297297297297, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10278", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10278

text different ways of the understanding video. This happens due to the differences in\n

", + "default_label": "raw_text" + }, + "29": { + "id": 29, + "task_path": "images/000001_000029_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10284.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 498, + "width": 247, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10284, + "_line": "qualification, thinking perspective and instructive foundation.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10284, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 63, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 63, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 63, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7477477477477478, \"width\": 0.5626423690205011, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10284", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10284

text qualification, thinking perspective and instructive foundation.\n

", + "default_label": "raw_text" + }, + "30": { + "id": 30, + "task_path": "images/000001_000030_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10296.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 54, + "y_top_left": 510, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10296, + "_line": "At present, a general grouping and profound survey of Automatic Video Annota-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10296, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.7657657657657657, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10296", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10296

text At present, a general grouping and profound survey of Automatic Video Annota-\n

", + "default_label": "raw_text" + }, + "31": { + "id": 31, + "task_path": "images/000001_000031_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10308.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 522, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10308, + "_line": "tion (AVA) techniques are yet lacking. Although some reviews of Automatic Image\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10308, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7837837837837838, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10308", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10308

text tion (AVA) techniques are yet lacking. Although some reviews of Automatic Image\n

", + "default_label": "raw_text" + }, + "32": { + "id": 32, + "task_path": "images/000001_000032_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10320.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 534, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10320, + "_line": "Annotation strategies, the foci were set on Content-Based Image Retrieval (CBIR) [1–\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10320, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJO+AdvTT5ada87cc+20", + "is_mergeable": true + }, + { + "start": 11, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "style", + "value": "OAMFJO+AdvTT5ada87cc+20", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8018018018018018, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10320", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10320

text Annotation strategies, the foci were set on Content-Based Image Retrieval (CBIR) [1–\n

", + "default_label": "raw_text" + }, + "33": { + "id": 33, + "task_path": "images/000001_000033_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10335.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 546, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10335, + "_line": "3, 20, 21, 23, 26], include extraction and semantic annotations [5–7, 12–18, 22, 24, 25,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10335, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8198198198198198, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10335", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10335

text 3, 20, 21, 23, 26], include extraction and semantic annotations [5–7, 12–18, 22, 24, 25,\n

", + "default_label": "raw_text" + }, + "34": { + "id": 34, + "task_path": "images/000001_000034_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10348.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 558, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10348, + "_line": "27, 44], statistical methodologies [4, 8, 9], segmentation based [10, 11], and deep\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10348, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8378378378378378, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10348", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10348

text 27, 44], statistical methodologies [4, 8, 9], segmentation based [10, 11], and deep\n

", + "default_label": "raw_text" + }, + "35": { + "id": 35, + "task_path": "images/000001_000035_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10361.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 570, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10361, + "_line": "learning [28–43]. AVA has variety of applications such as AVA for archival sports\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10361, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8558558558558559, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10361", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10361

text learning [28–43]. AVA has variety of applications such as AVA for archival sports\n

", + "default_label": "raw_text" + }, + "36": { + "id": 36, + "task_path": "images/000001_000036_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10375.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 582, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10375, + "_line": "videos [45], media quality assessment [46, 48], and video content analysis [47]. In this\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10375, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 84, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 84, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8738738738738738, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10375", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10375

text videos [45], media quality assessment [46, 48], and video content analysis [47]. In this\n

", + "default_label": "raw_text" + }, + "37": { + "id": 37, + "task_path": "images/000001_000037_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10383.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 594, + "width": 235, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10383, + "_line": "paper, various AVA methods are classified and discussed.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10383, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 56, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 56, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 56, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8918918918918919, \"width\": 0.5353075170842825, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10383", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10383

text paper, various AVA methods are classified and discussed.\n

", + "default_label": "raw_text" + }, + "38": { + "id": 38, + "task_path": "images/000001_000038_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10389.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 618, + "width": 141, + "height": 4 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10389, + "_line": "© Springer Nature Switzerland AG 2020\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": 5, + "level_2": 1, + "can_be_multiline": false, + "line_type": "list_item" + }, + "page_id": 0, + "line_id": 10389, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 38, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 38, + "name": "spacing", + "value": "18", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 2, + "end": 10, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 2, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 17, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 11, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 29, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 18, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 32, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 30, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 37, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 33, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 38, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.9279279279279279, \"width\": 0.3211845102505695, \"height\": 0.006006006006006006, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10389", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10389

text © Springer Nature Switzerland AG 2020\n

", + "default_label": "raw_text" + }, + "39": { + "id": 39, + "task_path": "images/000001_000039_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10401.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 627, + "width": 249, + "height": 4 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10401, + "_line": "A. Abraham et al. (Eds.): ISDA 2018, AISC 940, pp. 1060–1069, 2020.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10401, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 68, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 68, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 10, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 3, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 13, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 11, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 17, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 14, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 25, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 18, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 30, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 26, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 41, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 37, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 46, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 42, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 61, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 51, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 67, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 62, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 68, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.9414414414414415, \"width\": 0.5671981776765376, \"height\": 0.006006006006006006, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10401", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10401

text A. Abraham et al. (Eds.): ISDA 2018, AISC 940, pp. 1060–1069, 2020.\n

", + "default_label": "raw_text" + }, + "40": { + "id": 40, + "task_path": "images/000001_000040_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_10402.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 0, + "bbox": { + "x_top_left": 39, + "y_top_left": 637, + "width": 157, + "height": 4 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 10402, + "_line": "https://doi.org/10.1007/978-3-030-16657-1_99\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 0, + "line_id": 10402, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 45, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 45, + "name": "spacing", + "value": "5", + "is_mergeable": true + }, + { + "start": 0, + "end": 44, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 45, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.9564564564564565, \"width\": 0.357630979498861, \"height\": 0.006006006006006006, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + }, + { + "start": 0, + "end": 45, + "name": "attachment", + "value": "attach_617f60e8-1af0-491f-bc5f-88b82dbef411", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_10402", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 0

line_id 10402

text https://doi.org/10.1007/978-3-030-16657-1_99\n

", + "default_label": "raw_text" + }, + "41": { + "id": 41, + "task_path": "images/000001_000041_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20011.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 68, + "y_top_left": 60, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20011, + "_line": "In recent days, AVA techniques have gained attention into a new dimension\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20011, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 74, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 74, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 74, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.09009009009009009, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20011", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20011

text In recent days, AVA techniques have gained attention into a new dimension\n

", + "default_label": "raw_text" + }, + "42": { + "id": 42, + "task_path": "images/000001_000042_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20024.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 72, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20024, + "_line": "because of the shortcoming existing in the traditional annotation model. In [1], method\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20024, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.10810810810810811, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20024", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20024

text because of the shortcoming existing in the traditional annotation model. In [1], method\n

", + "default_label": "raw_text" + }, + "43": { + "id": 43, + "task_path": "images/000001_000043_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20038.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 84, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20038, + "_line": "driven by the multiple Bernoulli relevance models is proposed to carry on the anno-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20038, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.12612612612612611, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20038", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20038

text driven by the multiple Bernoulli relevance models is proposed to carry on the anno-\n

", + "default_label": "raw_text" + }, + "44": { + "id": 44, + "task_path": "images/000001_000044_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20048.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 96, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20048, + "_line": "tating video by unsupervised model. These accomplishments have supported the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20048, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 77, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.14414414414414414, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20048", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20048

text tating video by unsupervised model. These accomplishments have supported the\n

", + "default_label": "raw_text" + }, + "45": { + "id": 45, + "task_path": "images/000001_000045_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20059.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 108, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20059, + "_line": "advancement of video annotation techniques. Similarly, the annotation for the input\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20059, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.16216216216216217, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20059", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20059

text advancement of video annotation techniques. Similarly, the annotation for the input\n

", + "default_label": "raw_text" + }, + "46": { + "id": 46, + "task_path": "images/000001_000046_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20072.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 120, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20072, + "_line": "video can be predicted from the annotated dataset through the unsupervised mode or\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20072, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.18018018018018017, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20072", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20072

text video can be predicted from the annotated dataset through the unsupervised mode or\n

", + "default_label": "raw_text" + }, + "47": { + "id": 47, + "task_path": "images/000001_000047_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20073.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 132, + "width": 56, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20073, + "_line": "automatically.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20073, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 15, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.1981981981981982, \"width\": 0.1275626423690205, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20073", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20073

text automatically.\n

", + "default_label": "raw_text" + }, + "48": { + "id": 48, + "task_path": "images/000001_000048_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20086.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 68, + "y_top_left": 144, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20086, + "_line": "The main purpose of the annotation technique is to bridge the semantic gap\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20086, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 75, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 75, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 75, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.21621621621621623, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20086", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20086

text The main purpose of the annotation technique is to bridge the semantic gap\n

", + "default_label": "raw_text" + }, + "49": { + "id": 49, + "task_path": "images/000001_000049_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20098.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 156, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20098, + "_line": "between low-level features in the video and high-level semantic labels. Annotation can\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20098, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.23423423423423423, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20098", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20098

text between low-level features in the video and high-level semantic labels. Annotation can\n

", + "default_label": "raw_text" + }, + "50": { + "id": 50, + "task_path": "images/000001_000050_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20109.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 168, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20109, + "_line": "encourage video search using the content. Content-based video retrieval (CBVR) can\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20109, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.25225225225225223, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20109", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20109

text encourage video search using the content. Content-based video retrieval (CBVR) can\n

", + "default_label": "raw_text" + }, + "51": { + "id": 51, + "task_path": "images/000001_000051_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20121.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 180, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20121, + "_line": "be considered semantically significant than to perform without any query because the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20121, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.2702702702702703, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20121", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20121

text be considered semantically significant than to perform without any query because the\n

", + "default_label": "raw_text" + }, + "52": { + "id": 52, + "task_path": "images/000001_000052_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20134.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 192, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20134, + "_line": "automated mapping between labels and video frames can be trusted [1–3]. The process\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20134, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.2882882882882883, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20134", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20134

text automated mapping between labels and video frames can be trusted [1–3]. The process\n

", + "default_label": "raw_text" + }, + "53": { + "id": 53, + "task_path": "images/000001_000053_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20143.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 204, + "width": 215, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20143, + "_line": "of Automatic video annotation is presented in Fig. 1.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20143, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 54, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 54, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 54, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.3063063063063063, \"width\": 0.489749430523918, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20143", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20143

text of Automatic video annotation is presented in Fig. 1.\n

", + "default_label": "raw_text" + }, + "54": { + "id": 54, + "task_path": "images/000001_000054_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20156.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 68, + "y_top_left": 461, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20156, + "_line": "In this paper, five AVA strategies are illustrated. The taxonomy for the model-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20156, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "251", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.6921921921921922, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20156", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20156

text In this paper, five AVA strategies are illustrated. The taxonomy for the model-\n

", + "default_label": "raw_text" + }, + "55": { + "id": 55, + "task_path": "images/000001_000055_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20171.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 473, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20171, + "_line": "based techniques is shown in Fig. 2. The generative models are discussed in Sect. 2.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20171, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7102102102102102, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20171", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20171

text based techniques is shown in Fig. 2. The generative models are discussed in Sect. 2.\n

", + "default_label": "raw_text" + }, + "56": { + "id": 56, + "task_path": "images/000001_000056_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20182.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 485, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20182, + "_line": "Section 3 discuss about the distance-based similarity model, discriminative models are\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20182, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7282282282282282, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20182", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20182

text Section 3 discuss about the distance-based similarity model, discriminative models are\n

", + "default_label": "raw_text" + }, + "57": { + "id": 57, + "task_path": "images/000001_000057_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20196.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 497, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20196, + "_line": "discussed in Sect. 4. The ontology-based models are discussed in Sect. 5, and deep\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20196, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7462462462462462, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20196", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20196

text discussed in Sect. 4. The ontology-based models are discussed in Sect. 5, and deep\n

", + "default_label": "raw_text" + }, + "58": { + "id": 58, + "task_path": "images/000001_000058_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20209.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 509, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20209, + "_line": "learning-based models are discussed in Sect. 6. Section 7 discusses about a different\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20209, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7642642642642643, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20209", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20209

text learning-based models are discussed in Sect. 6. Section 7 discusses about a different\n

", + "default_label": "raw_text" + }, + "59": { + "id": 59, + "task_path": "images/000001_000059_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20219.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 521, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20219, + "_line": "framework, and challenges in annotation models. The previously mentioned five\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20219, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 77, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7822822822822822, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20219", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20219

text framework, and challenges in annotation models. The previously mentioned five\n

", + "default_label": "raw_text" + }, + "60": { + "id": 60, + "task_path": "images/000001_000060_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20232.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 533, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20232, + "_line": "classifications of AVA strategies can be additionally arranged into a few sub-classes as\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20232, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8003003003003003, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20232", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20232

text classifications of AVA strategies can be additionally arranged into a few sub-classes as\n

", + "default_label": "raw_text" + }, + "61": { + "id": 61, + "task_path": "images/000001_000061_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20243.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 545, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20243, + "_line": "indicated by their fundamental ideas. Provides a taxonomy for Automatic Video\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20243, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8183183183183184, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20243", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20243

text indicated by their fundamental ideas. Provides a taxonomy for Automatic Video\n

", + "default_label": "raw_text" + }, + "62": { + "id": 62, + "task_path": "images/000001_000062_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20245.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 53, + "y_top_left": 557, + "width": 92, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20245, + "_line": "Annotation techniques.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20245, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 23, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 23, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 23, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8363363363363363, \"width\": 0.20956719817767655, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20245", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20245

text Annotation techniques.\n

", + "default_label": "raw_text" + }, + "63": { + "id": 63, + "task_path": "images/000001_000063_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20253.jpg", + "labeled": [ + "caption" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 130, + "y_top_left": 432, + "width": 192, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20253, + "_line": "Fig. 1. The process of Automatic Video Annotation\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20253, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 50, + "name": "indentation", + "value": "130", + "is_mergeable": true + }, + { + "start": 0, + "end": 50, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 19, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 12, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 32, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 23, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 38, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 33, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 49, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 39, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 50, + "name": "bounding box", + "value": "{\"x_top_left\": 0.296127562642369, \"y_top_left\": 0.6486486486486487, \"width\": 0.43735763097949887, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20253", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20253

text Fig. 1. The process of Automatic Video Annotation\n

", + "default_label": "raw_text" + }, + "64": { + "id": 64, + "task_path": "images/000001_000064_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_20262.jpg", + "labeled": [ + "other" + ], + "data": { + "location": { + "page_number": 1, + "bbox": { + "x_top_left": 120, + "y_top_left": 35, + "width": 278, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 20262, + "_line": "A State-of-Art Review on Automatic Video Annotation Techniques 1061\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 1, + "line_id": 20262, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 68, + "name": "indentation", + "value": "120", + "is_mergeable": true + }, + { + "start": 0, + "end": 68, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 68, + "name": "bounding box", + "value": "{\"x_top_left\": 0.2733485193621868, \"y_top_left\": 0.052552552552552555, \"width\": 0.6332574031890661, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + }, + { + "start": 0, + "end": 68, + "name": "attachment", + "value": "attach_b927c3a0-cca3-49da-8a8d-a354f6620cc5", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_20262", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 1

line_id 20262

text A State-of-Art Review on Automatic Video Annotation Techniques 1061\n

", + "default_label": "raw_text" + }, + "65": { + "id": 65, + "task_path": "images/000001_000065_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30003.jpg", + "labeled": [ + "named_item" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 274, + "width": 204, + "height": 6 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30003, + "_line": "2 Generative Model-Based Techniques\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30003, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 36, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 36, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 2, + "end": 12, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 2, + "end": 12, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 13, + "end": 24, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 13, + "end": 24, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 25, + "end": 35, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 25, + "end": 35, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 36, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.4114114114114114, \"width\": 0.4646924829157175, \"height\": 0.009009009009009009, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30003", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30003

text 2 Generative Model-Based Techniques\n

", + "default_label": "raw_text" + }, + "66": { + "id": 66, + "task_path": "images/000001_000066_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30014.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 299, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30014, + "_line": "The generative model-based annotation techniques remain very prevalent, and good in\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30014, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "18", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.44894894894894893, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30014", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30014

text The generative model-based annotation techniques remain very prevalent, and good in\n

", + "default_label": "raw_text" + }, + "67": { + "id": 67, + "task_path": "images/000001_000067_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30028.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 311, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30028, + "_line": "an early st 21 century. These techniques are committed to boost the probability of video\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30028, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.466966966966967, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30028", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30028

text an early st 21 century. These techniques are committed to boost the probability of video\n

", + "default_label": "raw_text" + }, + "68": { + "id": 68, + "task_path": "images/000001_000068_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30041.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 323, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30041, + "_line": "features and tags. For an unannotated video, these techniques compute a joint proba-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30041, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.484984984984985, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30041", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30041

text features and tags. For an unannotated video, these techniques compute a joint proba-\n

", + "default_label": "raw_text" + }, + "69": { + "id": 69, + "task_path": "images/000001_000069_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30056.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 335, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30056, + "_line": "bilistic model of features extracted from video and labels to find the likelihood of a\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30056, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 84, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 84, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.503003003003003, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30056", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30056

text bilistic model of features extracted from video and labels to find the likelihood of a\n

", + "default_label": "raw_text" + }, + "70": { + "id": 70, + "task_path": "images/000001_000070_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30069.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 347, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30069, + "_line": "video tag using training dataset. The generative models utilized for AVA comprise of\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30069, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.521021021021021, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30069", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30069

text video tag using training dataset. The generative models utilized for AVA comprise of\n

", + "default_label": "raw_text" + }, + "71": { + "id": 71, + "task_path": "images/000001_000071_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30079.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 359, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30079, + "_line": "the relevance models. In probability and statistics, generative models characterize\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30079, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.539039039039039, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30079", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30079

text the relevance models. In probability and statistics, generative models characterize\n

", + "default_label": "raw_text" + }, + "72": { + "id": 72, + "task_path": "images/000001_000072_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30087.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 371, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30087, + "_line": "arbitrarily generating recognizable information, regularly provides some unseen\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30087, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5570570570570571, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30087", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30087

text arbitrarily generating recognizable information, regularly provides some unseen\n

", + "default_label": "raw_text" + }, + "73": { + "id": 73, + "task_path": "images/000001_000073_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30098.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 383, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30098, + "_line": "parameter. It indicates a joint probability distribution over labeled sequence and\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30098, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.575075075075075, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30098", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30098

text parameter. It indicates a joint probability distribution over labeled sequence and\n

", + "default_label": "raw_text" + }, + "74": { + "id": 74, + "task_path": "images/000001_000074_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30108.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 395, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30108, + "_line": "observations. Generative models utilized machine learning for presenting data directly.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30108, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5930930930930931, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30108", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30108

text observations. Generative models utilized machine learning for presenting data directly.\n

", + "default_label": "raw_text" + }, + "75": { + "id": 75, + "task_path": "images/000001_000075_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30121.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 54, + "y_top_left": 406, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30121, + "_line": "For the given input annotated training set, the authors in [2] have demonstrated\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30121, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.6096096096096096, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30121", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30121

text For the given input annotated training set, the authors in [2] have demonstrated\n

", + "default_label": "raw_text" + }, + "76": { + "id": 76, + "task_path": "images/000001_000076_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30136.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 418, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30136, + "_line": "probabilistic models. It enables us to identify the likelihood of creating a tag from the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30136, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 90, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 86, + "end": 89, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 86, + "end": 89, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6276276276276276, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30136", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30136

text probabilistic models. It enables us to identify the likelihood of creating a tag from the\n

", + "default_label": "raw_text" + }, + "77": { + "id": 77, + "task_path": "images/000001_000077_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30150.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 430, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30150, + "_line": "features in the video frames. In [3] the authors presented a video annotation technique,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30150, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6456456456456456, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30150", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30150

text features in the video frames. In [3] the authors presented a video annotation technique,\n

", + "default_label": "raw_text" + }, + "78": { + "id": 78, + "task_path": "images/000001_000078_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30162.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 442, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30162, + "_line": "called as a double cross-media relevance model (DCMRM). It assesses the joint\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30162, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6636636636636637, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30162", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30162

text called as a double cross-media relevance model (DCMRM). It assesses the joint\n

", + "default_label": "raw_text" + }, + "79": { + "id": 79, + "task_path": "images/000001_000079_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30173.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 454, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30173, + "_line": "probability through the accumulation of tags in a pre-characterized dictionary. It\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30173, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6816816816816816, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30173", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30173

text probability through the accumulation of tags in a pre-characterized dictionary. It\n

", + "default_label": "raw_text" + }, + "80": { + "id": 80, + "task_path": "images/000001_000080_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30184.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 466, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30184, + "_line": "includes two basic relations in video annotation. Namely, the word-to-video relation-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30184, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6996996996996997, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30184", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30184

text includes two basic relations in video annotation. Namely, the word-to-video relation-\n

", + "default_label": "raw_text" + }, + "81": { + "id": 81, + "task_path": "images/000001_000081_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30197.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 478, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30197, + "_line": "ship and word-to-word relationship. In [4] the authors presented a procedure to com-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30197, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7177177177177178, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30197", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30197

text ship and word-to-word relationship. In [4] the authors presented a procedure to com-\n

", + "default_label": "raw_text" + }, + "82": { + "id": 82, + "task_path": "images/000001_000082_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30211.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 490, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30211, + "_line": "bine the results of multiple trackers that includes evaluating the error statistics of the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30211, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 91, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 91, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 84, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 84, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 87, + "end": 90, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 87, + "end": 90, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 91, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7357357357357357, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30211", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30211

text bine the results of multiple trackers that includes evaluating the error statistics of the\n

", + "default_label": "raw_text" + }, + "83": { + "id": 83, + "task_path": "images/000001_000083_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30222.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 502, + "width": 314, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30222, + "_line": "individual trackers by comparing their outcome to a reliable people identifier.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30222, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7537537537537538, \"width\": 0.715261958997722, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30222", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30222

text individual trackers by comparing their outcome to a reliable people identifier.\n

", + "default_label": "raw_text" + }, + "84": { + "id": 84, + "task_path": "images/000001_000084_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30233.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 54, + "y_top_left": 514, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30233, + "_line": "Outstanding contributions made by the generative models to the improvement of\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30233, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.7717717717717718, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30233", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30233

text Outstanding contributions made by the generative models to the improvement of\n

", + "default_label": "raw_text" + }, + "85": { + "id": 85, + "task_path": "images/000001_000085_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30243.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 526, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30243, + "_line": "annotation techniques. Nevertheless, there exist challenges in the generative models-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30243, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7897897897897898, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30243", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30243

text annotation techniques. Nevertheless, there exist challenges in the generative models-\n

", + "default_label": "raw_text" + }, + "86": { + "id": 86, + "task_path": "images/000001_000086_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30245.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 538, + "width": 65, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30245, + "_line": "based strategies.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30245, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 18, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 18, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 18, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8078078078078078, \"width\": 0.1480637813211845, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30245", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30245

text based strategies.\n

", + "default_label": "raw_text" + }, + "87": { + "id": 87, + "task_path": "images/000001_000087_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30255.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 40, + "y_top_left": 556, + "width": 252, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30255, + "_line": "1. Generative models are weak to optimize the tag prediction.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": 2, + "level_2": 1, + "can_be_multiline": false, + "line_type": "list_item" + }, + "page_id": 2, + "line_id": 30255, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 62, + "name": "indentation", + "value": "40", + "is_mergeable": true + }, + { + "start": 0, + "end": 62, + "name": "spacing", + "value": "12", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 62, + "name": "bounding box", + "value": "{\"x_top_left\": 0.09111617312072894, \"y_top_left\": 0.8348348348348348, \"width\": 0.5740318906605922, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30255", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30255

text 1. Generative models are weak to optimize the tag prediction.\n

", + "default_label": "raw_text" + }, + "88": { + "id": 88, + "task_path": "images/000001_000088_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30263.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 40, + "y_top_left": 568, + "width": 237, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30263, + "_line": "2. Unable to recognize the complex semantic relationship.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": 2, + "level_2": 1, + "can_be_multiline": false, + "line_type": "list_item" + }, + "page_id": 2, + "line_id": 30263, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 58, + "name": "indentation", + "value": "40", + "is_mergeable": true + }, + { + "start": 0, + "end": 58, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 58, + "name": "bounding box", + "value": "{\"x_top_left\": 0.09111617312072894, \"y_top_left\": 0.8528528528528528, \"width\": 0.5398633257403189, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30263", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30263

text 2. Unable to recognize the complex semantic relationship.\n

", + "default_label": "raw_text" + }, + "89": { + "id": 89, + "task_path": "images/000001_000089_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30270.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 40, + "y_top_left": 580, + "width": 238, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30270, + "_line": "3. They require high computationally complex algorithms.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": 2, + "level_2": 1, + "can_be_multiline": false, + "line_type": "list_item" + }, + "page_id": 2, + "line_id": 30270, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 57, + "name": "indentation", + "value": "40", + "is_mergeable": true + }, + { + "start": 0, + "end": 57, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 57, + "name": "bounding box", + "value": "{\"x_top_left\": 0.09111617312072894, \"y_top_left\": 0.8708708708708709, \"width\": 0.5421412300683371, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30270", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30270

text 3. They require high computationally complex algorithms.\n

", + "default_label": "raw_text" + }, + "90": { + "id": 90, + "task_path": "images/000001_000090_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30278.jpg", + "labeled": [ + "caption" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 95, + "y_top_left": 244, + "width": 233, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30278, + "_line": "Fig. 2. Taxonomy for Automatic Video Annotation techniques.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30278, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 60, + "name": "indentation", + "value": "95", + "is_mergeable": true + }, + { + "start": 0, + "end": 60, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 8, + "end": 16, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 8, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 30, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 21, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 47, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 37, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 59, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 48, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 60, + "name": "bounding box", + "value": "{\"x_top_left\": 0.2164009111617312, \"y_top_left\": 0.3663663663663664, \"width\": 0.530751708428246, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30278", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30278

text Fig. 2. Taxonomy for Automatic Video Annotation techniques.\n

", + "default_label": "raw_text" + }, + "91": { + "id": 91, + "task_path": "images/000001_000091_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_30284.jpg", + "labeled": [ + "other" + ], + "data": { + "location": { + "page_number": 2, + "bbox": { + "x_top_left": 39, + "y_top_left": 35, + "width": 132, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 30284, + "_line": "1062 K. Randive and R. Mohan\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 2, + "line_id": 30284, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 29, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 29, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 15, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 8, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 29, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.052552552552552555, \"width\": 0.30068337129840544, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_30284", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 2

line_id 30284

text 1062 K. Randive and R. Mohan\n

", + "default_label": "raw_text" + }, + "92": { + "id": 92, + "task_path": "images/000001_000092_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40004.jpg", + "labeled": [ + "named_item" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 61, + "width": 281, + "height": 6 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40004, + "_line": "3 Distance-Based Similarity Model-Based Techniques\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40004, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 51, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 51, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 2, + "end": 16, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 2, + "end": 16, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 17, + "end": 27, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 17, + "end": 27, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 28, + "end": 39, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 28, + "end": 39, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 40, + "end": 50, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 40, + "end": 50, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 51, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.0915915915915916, \"width\": 0.6400911161731208, \"height\": 0.009009009009009009, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40004", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40004

text 3 Distance-Based Similarity Model-Based Techniques\n

", + "default_label": "raw_text" + }, + "93": { + "id": 93, + "task_path": "images/000001_000093_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40016.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 86, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40016, + "_line": "The distance-based similarity assumes videos with similar tags for annotation. For a\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40016, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "18", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.12912912912912913, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40016", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40016

text The distance-based similarity assumes videos with similar tags for annotation. For a\n

", + "default_label": "raw_text" + }, + "94": { + "id": 94, + "task_path": "images/000001_000094_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40032.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 98, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40032, + "_line": "sample video, these techniques enable a search for a set of same videos, and the labels\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40032, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.14714714714714713, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40032", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40032

text sample video, these techniques enable a search for a set of same videos, and the labels\n

", + "default_label": "raw_text" + }, + "95": { + "id": 95, + "task_path": "images/000001_000095_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40040.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 110, + "width": 180, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40040, + "_line": "of that video are depending on its similarity.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40040, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 47, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 47, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 47, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.16516516516516516, \"width\": 0.41002277904328016, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40040", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40040

text of that video are depending on its similarity.\n

", + "default_label": "raw_text" + }, + "96": { + "id": 96, + "task_path": "images/000001_000096_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40053.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 68, + "y_top_left": 122, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40053, + "_line": "In [5], the authors proposed a beyond distance measurement to manage the defi-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40053, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.1831831831831832, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40053", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40053

text In [5], the authors proposed a beyond distance measurement to manage the defi-\n

", + "default_label": "raw_text" + }, + "97": { + "id": 97, + "task_path": "images/000001_000097_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40066.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 133, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40066, + "_line": "ciency of the training data in video annotation by building neighborhood similarity. In\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40066, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.1996996996996997, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40066", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40066

text ciency of the training data in video annotation by building neighborhood similarity. In\n

", + "default_label": "raw_text" + }, + "98": { + "id": 98, + "task_path": "images/000001_000098_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40080.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 145, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40080, + "_line": "[6], the authors have presented an application of event detection from video range from\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40080, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.21771771771771772, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40080", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40080

text [6], the authors have presented an application of event detection from video range from\n

", + "default_label": "raw_text" + }, + "99": { + "id": 99, + "task_path": "images/000001_000099_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40095.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 157, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40095, + "_line": "reducing the semantic gap. The inferred metric can be utilized in a CBVR system to\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40095, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.23573573573573572, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40095", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40095

text reducing the semantic gap. The inferred metric can be utilized in a CBVR system to\n

", + "default_label": "raw_text" + }, + "100": { + "id": 100, + "task_path": "images/000001_000100_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40106.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 169, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40106, + "_line": "extract the semantically similar video. Nearest neighbor learning takes distance metric\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40106, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.25375375375375375, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40106", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40106

text extract the semantically similar video. Nearest neighbor learning takes distance metric\n

", + "default_label": "raw_text" + }, + "101": { + "id": 101, + "task_path": "images/000001_000101_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40121.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 181, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40121, + "_line": "from the given set of labeled points and marks the relation between distance in the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40121, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.27177177177177175, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40121", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40121

text from the given set of labeled points and marks the relation between distance in the\n

", + "default_label": "raw_text" + }, + "102": { + "id": 102, + "task_path": "images/000001_000102_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40123.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 193, + "width": 52, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40123, + "_line": "training data.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40123, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 15, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.2897897897897898, \"width\": 0.11845102505694761, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40123", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40123

text training data.\n

", + "default_label": "raw_text" + }, + "103": { + "id": 103, + "task_path": "images/000001_000103_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40139.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 68, + "y_top_left": 205, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40139, + "_line": "In [7], the author not only tried to filter the semantic inferences of near- scenes for\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40139, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.3078078078078078, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40139", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40139

text In [7], the author not only tried to filter the semantic inferences of near- scenes for\n

", + "default_label": "raw_text" + }, + "104": { + "id": 104, + "task_path": "images/000001_000104_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40152.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 217, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40152, + "_line": "video annotation but also to retain annotations by using a hierarchical video-to-near scene\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40152, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 92, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 92, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 86, + "end": 91, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 86, + "end": 91, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 92, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.3258258258258258, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40152", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40152

text video annotation but also to retain annotations by using a hierarchical video-to-near scene\n

", + "default_label": "raw_text" + }, + "105": { + "id": 105, + "task_path": "images/000001_000105_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40163.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 229, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40163, + "_line": "annotation framework. A strategy for AVA given multi-label learning and multi-feature\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40163, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.34384384384384387, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40163", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40163

text annotation framework. A strategy for AVA given multi-label learning and multi-feature\n

", + "default_label": "raw_text" + }, + "106": { + "id": 106, + "task_path": "images/000001_000106_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40174.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 241, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40174, + "_line": "combination k-nearest neighbor algorithm was proposed in [8]. Amid the previous\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40174, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.36186186186186187, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40174", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40174

text combination k-nearest neighbor algorithm was proposed in [8]. Amid the previous\n

", + "default_label": "raw_text" + }, + "107": { + "id": 107, + "task_path": "images/000001_000107_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40189.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 253, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40189, + "_line": "decade, several research efforts have been done on the best way to exploit the semantic\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40189, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.37987987987987987, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40189", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40189

text decade, several research efforts have been done on the best way to exploit the semantic\n

", + "default_label": "raw_text" + }, + "108": { + "id": 108, + "task_path": "images/000001_000108_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40202.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 265, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40202, + "_line": "correlations to enhance the image and video annotation, at the same time classifies\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40202, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.3978978978978979, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40202", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40202

text correlations to enhance the image and video annotation, at the same time classifies\n

", + "default_label": "raw_text" + }, + "109": { + "id": 109, + "task_path": "images/000001_000109_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40213.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 277, + "width": 322, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40213, + "_line": "concepts and represents them using a Correlative Multi-Label (CML) system [9].\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40213, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.4159159159159159, \"width\": 0.7334851936218679, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40213", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40213

text concepts and represents them using a Correlative Multi-Label (CML) system [9].\n

", + "default_label": "raw_text" + }, + "110": { + "id": 110, + "task_path": "images/000001_000110_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40222.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 68, + "y_top_left": 289, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40222, + "_line": "The Distance based similarity model-based AVA methods are structure-intuitive.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40222, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.4339339339339339, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40222", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40222

text The Distance based similarity model-based AVA methods are structure-intuitive.\n

", + "default_label": "raw_text" + }, + "111": { + "id": 111, + "task_path": "images/000001_000111_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40236.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 301, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40236, + "_line": "Due to the high flexibility of the model, tag prediction is quite successful. However,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40236, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.4519519519519519, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40236", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40236

text Due to the high flexibility of the model, tag prediction is quite successful. However,\n

", + "default_label": "raw_text" + }, + "112": { + "id": 112, + "task_path": "images/000001_000112_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40245.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 313, + "width": 285, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40245, + "_line": "because of some intrinsic shortages, more improvements are expected.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40245, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 69, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 69, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 69, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.46996996996997, \"width\": 0.6492027334851936, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40245", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40245

text because of some intrinsic shortages, more improvements are expected.\n

", + "default_label": "raw_text" + }, + "113": { + "id": 113, + "task_path": "images/000001_000113_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40249.jpg", + "labeled": [ + "named_item" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 345, + "width": 223, + "height": 6 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40249, + "_line": "4 Discriminative Model-Based Techniques\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40249, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 40, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 40, + "name": "spacing", + "value": "26", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 2, + "end": 16, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 2, + "end": 16, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 17, + "end": 28, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 17, + "end": 28, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 29, + "end": 39, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 29, + "end": 39, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 40, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5180180180180181, \"width\": 0.5079726651480638, \"height\": 0.009009009009009009, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40249", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40249

text 4 Discriminative Model-Based Techniques\n

", + "default_label": "raw_text" + }, + "114": { + "id": 114, + "task_path": "images/000001_000114_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40261.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 370, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40261, + "_line": "Video annotation is considered as the multi-label classification problem in the dis-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40261, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "18", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5555555555555556, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40261", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40261

text Video annotation is considered as the multi-label classification problem in the dis-\n

", + "default_label": "raw_text" + }, + "115": { + "id": 115, + "task_path": "images/000001_000115_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40273.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 382, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40273, + "_line": "criminative model-based annotation techniques. It is addressed with the use of binary\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40273, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5735735735735735, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40273", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40273

text criminative model-based annotation techniques. It is addressed with the use of binary\n

", + "default_label": "raw_text" + }, + "116": { + "id": 116, + "task_path": "images/000001_000116_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40285.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 394, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40285, + "_line": "classifier. To anticipate the labels for unlabeled videos, an independent binary classifier\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40285, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 90, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 89, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 89, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5915915915915916, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40285", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40285

text classifier. To anticipate the labels for unlabeled videos, an independent binary classifier\n

", + "default_label": "raw_text" + }, + "117": { + "id": 117, + "task_path": "images/000001_000117_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40301.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 406, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40301, + "_line": "is used for every label. So, this study leads to video annotation using SVM [11], tag\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40301, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6096096096096096, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40301", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40301

text is used for every label. So, this study leads to video annotation using SVM [11], tag\n

", + "default_label": "raw_text" + }, + "118": { + "id": 118, + "task_path": "images/000001_000118_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40309.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 418, + "width": 259, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40309, + "_line": "ranking system [13], and semi-supervised learning method [15].\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40309, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 63, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 63, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 63, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6276276276276276, \"width\": 0.5899772209567198, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40309", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40309

text ranking system [13], and semi-supervised learning method [15].\n

", + "default_label": "raw_text" + }, + "119": { + "id": 119, + "task_path": "images/000001_000119_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40324.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 68, + "y_top_left": 430, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40324, + "_line": "In [11], the authors have trained a target classifier using the web video sets that\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40324, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.6456456456456456, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40324", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40324

text In [11], the authors have trained a target classifier using the web video sets that\n

", + "default_label": "raw_text" + }, + "120": { + "id": 120, + "task_path": "images/000001_000120_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40337.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 442, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40337, + "_line": "adapt the pre-learned classifiers. This method is called as an adaptive latent structured\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40337, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6636636636636637, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40337", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40337

text adapt the pre-learned classifiers. This method is called as an adaptive latent structured\n

", + "default_label": "raw_text" + }, + "121": { + "id": 121, + "task_path": "images/000001_000121_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40352.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 454, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40352, + "_line": "SVM. Here the locations of the key segment are considered as latent variables. In [14],\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40352, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6816816816816816, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40352", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40352

text SVM. Here the locations of the key segment are considered as latent variables. In [14],\n

", + "default_label": "raw_text" + }, + "122": { + "id": 122, + "task_path": "images/000001_000122_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40363.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 466, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40363, + "_line": "the authors presented the AVA technique, which is unsupervised leveraging video\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40363, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6996996996996997, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40363", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40363

text the authors presented the AVA technique, which is unsupervised leveraging video\n

", + "default_label": "raw_text" + }, + "123": { + "id": 123, + "task_path": "images/000001_000123_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40376.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 478, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40376, + "_line": "content. This method uses the search procedure followed by mining. The video content\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40376, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7177177177177178, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40376", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40376

text content. This method uses the search procedure followed by mining. The video content\n

", + "default_label": "raw_text" + }, + "124": { + "id": 124, + "task_path": "images/000001_000124_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40389.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 490, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40389, + "_line": "and the audio identified transcriptions are given as the queries, and the multimodal\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40389, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7357357357357357, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40389", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40389

text and the audio identified transcriptions are given as the queries, and the multimodal\n

", + "default_label": "raw_text" + }, + "125": { + "id": 125, + "task_path": "images/000001_000125_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40394.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 502, + "width": 132, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40394, + "_line": "search ranks the relevant videos.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40394, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 34, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 34, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 34, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7537537537537538, \"width\": 0.30068337129840544, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40394", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40394

text search ranks the relevant videos.\n

", + "default_label": "raw_text" + }, + "126": { + "id": 126, + "task_path": "images/000001_000126_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40405.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 68, + "y_top_left": 514, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40405, + "_line": "Recently, semi-supervised learning is used extensively in research. It is predomi-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40405, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.7717717717717718, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40405", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40405

text Recently, semi-supervised learning is used extensively in research. It is predomi-\n

", + "default_label": "raw_text" + }, + "127": { + "id": 127, + "task_path": "images/000001_000127_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40418.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 526, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40418, + "_line": "nantly dedicated to the graph-based learning techniques. It visualizes entire data as the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40418, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 90, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 86, + "end": 89, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 86, + "end": 89, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7897897897897898, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40418", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40418

text nantly dedicated to the graph-based learning techniques. It visualizes entire data as the\n

", + "default_label": "raw_text" + }, + "128": { + "id": 128, + "task_path": "images/000001_000128_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40430.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 538, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40430, + "_line": "graph. In this propagation-based technique, the correlation between the labels can be\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40430, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8078078078078078, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40430", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40430

text graph. In this propagation-based technique, the correlation between the labels can be\n

", + "default_label": "raw_text" + }, + "129": { + "id": 129, + "task_path": "images/000001_000129_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40444.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 550, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40444, + "_line": "effectively combined. This correlation between the labels is utilized in two ways in the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40444, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8258258258258259, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40444", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40444

text effectively combined. This correlation between the labels is utilized in two ways in the\n

", + "default_label": "raw_text" + }, + "130": { + "id": 130, + "task_path": "images/000001_000130_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40457.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 562, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40457, + "_line": "graph based learning technique. In [15], authors have proposed a technique, named as\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40457, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8438438438438438, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40457", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40457

text graph based learning technique. In [15], authors have proposed a technique, named as\n

", + "default_label": "raw_text" + }, + "131": { + "id": 131, + "task_path": "images/000001_000131_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40464.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 574, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40464, + "_line": "optimized multigraph-based semi-supervised learning (OMG-SSL). The technique\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40464, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 77, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8618618618618619, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40464", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40464

text optimized multigraph-based semi-supervised learning (OMG-SSL). The technique\n

", + "default_label": "raw_text" + }, + "132": { + "id": 132, + "task_path": "images/000001_000132_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40475.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 586, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40475, + "_line": "tends to tackle different deciding aspects in the video annotation simultaneously.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40475, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8798798798798799, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40475", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40475

text tends to tackle different deciding aspects in the video annotation simultaneously.\n

", + "default_label": "raw_text" + }, + "133": { + "id": 133, + "task_path": "images/000001_000133_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40486.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 53, + "y_top_left": 597, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40486, + "_line": "Multiple methods and temporal consistency are included as deciding aspects. It\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40486, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8963963963963963, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40486", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40486

text Multiple methods and temporal consistency are included as deciding aspects. It\n

", + "default_label": "raw_text" + }, + "134": { + "id": 134, + "task_path": "images/000001_000134_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_40495.jpg", + "labeled": [ + "other" + ], + "data": { + "location": { + "page_number": 3, + "bbox": { + "x_top_left": 120, + "y_top_left": 35, + "width": 278, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 40495, + "_line": "A State-of-Art Review on Automatic Video Annotation Techniques 1063\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 3, + "line_id": 40495, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 68, + "name": "indentation", + "value": "120", + "is_mergeable": true + }, + { + "start": 0, + "end": 68, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 68, + "name": "bounding box", + "value": "{\"x_top_left\": 0.2733485193621868, \"y_top_left\": 0.052552552552552555, \"width\": 0.6332574031890661, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_40495", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 3

line_id 40495

text A State-of-Art Review on Automatic Video Annotation Techniques 1063\n

", + "default_label": "raw_text" + }, + "135": { + "id": 135, + "task_path": "images/000001_000135_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50011.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 60, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50011, + "_line": "compares to the various relationship among video units and represented by different\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50011, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.09009009009009009, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50011", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50011

text compares to the various relationship among video units and represented by different\n

", + "default_label": "raw_text" + }, + "136": { + "id": 136, + "task_path": "images/000001_000136_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50022.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 72, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50022, + "_line": "graphs. A semi-supervised annotation approach, named as leaning an optimized graph\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50022, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.10810810810810811, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50022", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50022

text graphs. A semi-supervised annotation approach, named as leaning an optimized graph\n

", + "default_label": "raw_text" + }, + "137": { + "id": 137, + "task_path": "images/000001_000137_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50034.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 84, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50034, + "_line": "(OGL). The relationship between the data points can be embedded more accurately\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50034, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.12612612612612611, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50034", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50034

text (OGL). The relationship between the data points can be embedded more accurately\n

", + "default_label": "raw_text" + }, + "138": { + "id": 138, + "task_path": "images/000001_000138_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50048.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 96, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50048, + "_line": "from incomplete labels and multiple features in the proposed approach [17, 18]. In [19],\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50048, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.14414414414414414, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50048", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50048

text from incomplete labels and multiple features in the proposed approach [17, 18]. In [19],\n

", + "default_label": "raw_text" + }, + "139": { + "id": 139, + "task_path": "images/000001_000139_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50058.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 108, + "width": 287, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50058, + "_line": "the authors proposed an approach for sentence generation from videos.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50058, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 70, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 70, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 70, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.16216216216216217, \"width\": 0.6537585421412301, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50058", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50058

text the authors proposed an approach for sentence generation from videos.\n

", + "default_label": "raw_text" + }, + "140": { + "id": 140, + "task_path": "images/000001_000140_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50067.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 54, + "y_top_left": 120, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50067, + "_line": "Despite the productivity and intuitionistic advantages of discriminative model-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50067, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.18018018018018017, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50067", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50067

text Despite the productivity and intuitionistic advantages of discriminative model-\n

", + "default_label": "raw_text" + }, + "141": { + "id": 141, + "task_path": "images/000001_000141_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50076.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 132, + "width": 253, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50076, + "_line": "based techniques, there exist challenges, which are as follows:\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50076, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 64, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 64, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 64, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.1981981981981982, \"width\": 0.5763097949886105, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50076", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50076

text based techniques, there exist challenges, which are as follows:\n

", + "default_label": "raw_text" + }, + "142": { + "id": 142, + "task_path": "images/000001_000142_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50088.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 40, + "y_top_left": 150, + "width": 315, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50088, + "_line": "1. The interrelationship among the visual features and tags is often excluded.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": 2, + "level_2": 1, + "can_be_multiline": false, + "line_type": "list_item" + }, + "page_id": 4, + "line_id": 50088, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "40", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "12", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.09111617312072894, \"y_top_left\": 0.22522522522522523, \"width\": 0.7175398633257403, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50088", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50088

text 1. The interrelationship among the visual features and tags is often excluded.\n

", + "default_label": "raw_text" + }, + "143": { + "id": 143, + "task_path": "images/000001_000143_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50102.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 40, + "y_top_left": 162, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50102, + "_line": "2. These techniques are often are receptive to the quality of training datasets, because\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": 2, + "level_2": 1, + "can_be_multiline": false, + "line_type": "list_item" + }, + "page_id": 4, + "line_id": 50102, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "40", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.09111617312072894, \"y_top_left\": 0.24324324324324326, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50102", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50102

text 2. These techniques are often are receptive to the quality of training datasets, because\n

", + "default_label": "raw_text" + }, + "144": { + "id": 144, + "task_path": "images/000001_000144_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50109.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 53, + "y_top_left": 174, + "width": 189, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50109, + "_line": "of the predominant usage of label correlations.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50109, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 48, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 48, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 48, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.26126126126126126, \"width\": 0.4305239179954442, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50109", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50109

text of the predominant usage of label correlations.\n

", + "default_label": "raw_text" + }, + "145": { + "id": 145, + "task_path": "images/000001_000145_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50122.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 54, + "y_top_left": 192, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50122, + "_line": "The graph-based video techniques are transitive and can determine the labels of the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50122, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "12", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.2882882882882883, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50122", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50122

text The graph-based video techniques are transitive and can determine the labels of the\n

", + "default_label": "raw_text" + }, + "146": { + "id": 146, + "task_path": "images/000001_000146_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50134.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 204, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50134, + "_line": "unannotated video. However, this is not useful in practical applications consisting of\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50134, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 84, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 84, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.3063063063063063, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50134", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50134

text unannotated video. However, this is not useful in practical applications consisting of\n

", + "default_label": "raw_text" + }, + "147": { + "id": 147, + "task_path": "images/000001_000147_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50139.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 216, + "width": 131, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50139, + "_line": "the large video annotation tasks.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50139, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 34, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 34, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 34, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.32432432432432434, \"width\": 0.2984054669703872, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50139", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50139

text the large video annotation tasks.\n

", + "default_label": "raw_text" + }, + "148": { + "id": 148, + "task_path": "images/000001_000148_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50142.jpg", + "labeled": [ + "named_item" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 248, + "width": 159, + "height": 6 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50142, + "_line": "5 Ontology-Based Techniques\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50142, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 28, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 28, + "name": "spacing", + "value": "26", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 2, + "end": 16, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 2, + "end": 16, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 17, + "end": 27, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 17, + "end": 27, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 28, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.37237237237237236, \"width\": 0.3621867881548975, \"height\": 0.009009009009009009, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50142", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50142

text 5 Ontology-Based Techniques\n

", + "default_label": "raw_text" + }, + "149": { + "id": 149, + "task_path": "images/000001_000149_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50155.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 273, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50155, + "_line": "Concept relationships are proven to be important learning assets that can improve the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50155, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "18", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.4099099099099099, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50155", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50155

text Concept relationships are proven to be important learning assets that can improve the\n

", + "default_label": "raw_text" + }, + "150": { + "id": 150, + "task_path": "images/000001_000150_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50164.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 285, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50164, + "_line": "effectiveness of video annotation. Ontology-based annotation techniques denote the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50164, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.42792792792792794, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50164", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50164

text effectiveness of video annotation. Ontology-based annotation techniques denote the\n

", + "default_label": "raw_text" + }, + "151": { + "id": 151, + "task_path": "images/000001_000151_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50175.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 297, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50175, + "_line": "theoretical representations of the semantic relationship between the feature present in\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50175, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.44594594594594594, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50175", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50175

text theoretical representations of the semantic relationship between the feature present in\n

", + "default_label": "raw_text" + }, + "152": { + "id": 152, + "task_path": "images/000001_000152_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50186.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 309, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50186, + "_line": "video and labels. In object-ontology, semantics descriptors are formulated from our\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50186, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.46396396396396394, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50186", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50186

text video and labels. In object-ontology, semantics descriptors are formulated from our\n

", + "default_label": "raw_text" + }, + "153": { + "id": 153, + "task_path": "images/000001_000153_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50198.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 321, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50198, + "_line": "daily language. These semantic descriptors from a simple vocabulary give a qualitative\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50198, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.481981981981982, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50198", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50198

text daily language. These semantic descriptors from a simple vocabulary give a qualitative\n

", + "default_label": "raw_text" + }, + "154": { + "id": 154, + "task_path": "images/000001_000154_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50211.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 333, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50211, + "_line": "meaning of high-level queries. Video dataset can be grouped, based on the high-level\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50211, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50211", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50211

text meaning of high-level queries. Video dataset can be grouped, based on the high-level\n

", + "default_label": "raw_text" + }, + "155": { + "id": 155, + "task_path": "images/000001_000155_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50220.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 345, + "width": 277, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50220, + "_line": "semantics (keywords) and mapping of semantic descriptors [24, 25].\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50220, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 67, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 67, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 67, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5180180180180181, \"width\": 0.6309794988610479, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50220", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50220

text semantics (keywords) and mapping of semantic descriptors [24, 25].\n

", + "default_label": "raw_text" + }, + "156": { + "id": 156, + "task_path": "images/000001_000156_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50231.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 54, + "y_top_left": 357, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50231, + "_line": "An Ontology-based structured annotation technique is proposed in [20, 23] for\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50231, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.536036036036036, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50231", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50231

text An Ontology-based structured annotation technique is proposed in [20, 23] for\n

", + "default_label": "raw_text" + }, + "157": { + "id": 157, + "task_path": "images/000001_000157_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50241.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 369, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50241, + "_line": "CBVR using spatiotemporal Reasoning. These rule-based systems are efficient in\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50241, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5540540540540541, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50241", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50241

text CBVR using spatiotemporal Reasoning. These rule-based systems are efficient in\n

", + "default_label": "raw_text" + }, + "158": { + "id": 158, + "task_path": "images/000001_000158_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50255.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 381, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50255, + "_line": "describing the temporal information of events and activities in videos. In [21], the web\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50255, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5720720720720721, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50255", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50255

text describing the temporal information of events and activities in videos. In [21], the web\n

", + "default_label": "raw_text" + }, + "159": { + "id": 159, + "task_path": "images/000001_000159_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50267.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 393, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50267, + "_line": "video search engine was proposed. The combination of Boolean and temporal relations\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50267, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5900900900900901, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50267", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50267

text video search engine was proposed. The combination of Boolean and temporal relations\n

", + "default_label": "raw_text" + }, + "160": { + "id": 160, + "task_path": "images/000001_000160_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50281.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 405, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50281, + "_line": "for the automatic annotation and retrieval of video content is utilized by the ontologies\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50281, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 90, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 89, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 89, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6081081081081081, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50281", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50281

text for the automatic annotation and retrieval of video content is utilized by the ontologies\n

", + "default_label": "raw_text" + }, + "161": { + "id": 161, + "task_path": "images/000001_000161_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50293.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 417, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50293, + "_line": "and the permitted queries. A framework for automatic semantic video annotation of\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50293, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6261261261261262, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50293", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50293

text and the permitted queries. A framework for automatic semantic video annotation of\n

", + "default_label": "raw_text" + }, + "162": { + "id": 162, + "task_path": "images/000001_000162_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50304.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 429, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50304, + "_line": "unconstrained videos is presented in [22]. This framework combines two layers,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50304, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6441441441441441, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50304", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50304

text unconstrained videos is presented in [22]. This framework combines two layers,\n

", + "default_label": "raw_text" + }, + "163": { + "id": 163, + "task_path": "images/000001_000163_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50313.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 441, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50313, + "_line": "namely annotation analysis using commonsense knowledgebase and low-level visual\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50313, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6621621621621622, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50313", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50313

text namely annotation analysis using commonsense knowledgebase and low-level visual\n

", + "default_label": "raw_text" + }, + "164": { + "id": 164, + "task_path": "images/000001_000164_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50322.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 453, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50322, + "_line": "similarity matching. Commonsense ontology is framed by consolidating multiple-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50322, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6801801801801802, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50322", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50322

text similarity matching. Commonsense ontology is framed by consolidating multiple-\n

", + "default_label": "raw_text" + }, + "165": { + "id": 165, + "task_path": "images/000001_000165_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50325.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 465, + "width": 133, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50325, + "_line": "structured semantic relationships.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50325, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 35, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 35, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 35, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6981981981981982, \"width\": 0.30296127562642367, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50325", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50325

text structured semantic relationships.\n

", + "default_label": "raw_text" + }, + "166": { + "id": 166, + "task_path": "images/000001_000166_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50338.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 54, + "y_top_left": 477, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50338, + "_line": "In [24], the authors formalized the method for analyzing the content in multimedia\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50338, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.7162162162162162, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50338", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50338

text In [24], the authors formalized the method for analyzing the content in multimedia\n

", + "default_label": "raw_text" + }, + "167": { + "id": 167, + "task_path": "images/000001_000167_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50350.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 488, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50350, + "_line": "using the interrelationship between the low- and high-level concept descriptors. In [25],\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50350, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 90, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 84, + "end": 89, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 84, + "end": 89, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7327327327327328, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50350", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50350

text using the interrelationship between the low- and high-level concept descriptors. In [25],\n

", + "default_label": "raw_text" + }, + "168": { + "id": 168, + "task_path": "images/000001_000168_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50361.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 500, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50361, + "_line": "the authors demonstrated the fuzzy knowledge representations. The probability or the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50361, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7507507507507507, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50361", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50361

text the authors demonstrated the fuzzy knowledge representations. The probability or the\n

", + "default_label": "raw_text" + }, + "169": { + "id": 169, + "task_path": "images/000001_000169_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50372.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 512, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50372, + "_line": "degree of uncertainty is determined from these representations. The fuzzy Ontology\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50372, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7687687687687688, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50372", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50372

text degree of uncertainty is determined from these representations. The fuzzy Ontology\n

", + "default_label": "raw_text" + }, + "170": { + "id": 170, + "task_path": "images/000001_000170_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50383.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 524, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50383, + "_line": "model populates the automatically generated ontology from the different video anno-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50383, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.7867867867867868, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50383", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50383

text model populates the automatically generated ontology from the different video anno-\n

", + "default_label": "raw_text" + }, + "171": { + "id": 171, + "task_path": "images/000001_000171_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50395.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 536, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50395, + "_line": "tation datasets. The content was utilized for the improvement of video semantic\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50395, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8048048048048048, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50395", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50395

text tation datasets. The content was utilized for the improvement of video semantic\n

", + "default_label": "raw_text" + }, + "172": { + "id": 172, + "task_path": "images/000001_000172_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50396.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 548, + "width": 55, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50396, + "_line": "interpretation.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50396, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 16, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 16, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 16, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8228228228228228, \"width\": 0.1252847380410023, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50396", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50396

text interpretation.\n

", + "default_label": "raw_text" + }, + "173": { + "id": 173, + "task_path": "images/000001_000173_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50408.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 54, + "y_top_left": 560, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50408, + "_line": "In [26], the authors proposed a general block schematic for ontology-based video\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50408, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.8408408408408409, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50408", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50408

text In [26], the authors proposed a general block schematic for ontology-based video\n

", + "default_label": "raw_text" + }, + "174": { + "id": 174, + "task_path": "images/000001_000174_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50419.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 572, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50419, + "_line": "annotation. The usage of ontology-based technique helps in the semantic annotation.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50419, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8588588588588588, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50419", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50419

text annotation. The usage of ontology-based technique helps in the semantic annotation.\n

", + "default_label": "raw_text" + }, + "175": { + "id": 175, + "task_path": "images/000001_000175_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50432.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 584, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50432, + "_line": "Either rule learning or machine learning, both together with ontology can give better\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50432, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8768768768768769, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50432", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50432

text Either rule learning or machine learning, both together with ontology can give better\n

", + "default_label": "raw_text" + }, + "176": { + "id": 176, + "task_path": "images/000001_000176_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50443.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 596, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50443, + "_line": "annotation compared with different techniques In [27], the authors proposed a\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50443, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.8948948948948949, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50443", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50443

text annotation compared with different techniques In [27], the authors proposed a\n

", + "default_label": "raw_text" + }, + "177": { + "id": 177, + "task_path": "images/000001_000177_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_50449.jpg", + "labeled": [ + "other" + ], + "data": { + "location": { + "page_number": 4, + "bbox": { + "x_top_left": 39, + "y_top_left": 35, + "width": 132, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 50449, + "_line": "1064 K. Randive and R. Mohan\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 4, + "line_id": 50449, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 29, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 29, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 15, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 8, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 29, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.052552552552552555, \"width\": 0.30068337129840544, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_50449", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 4

line_id 50449

text 1064 K. Randive and R. Mohan\n

", + "default_label": "raw_text" + }, + "178": { + "id": 178, + "task_path": "images/000001_000178_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60008.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 60, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60008, + "_line": "collaborative algorithm, which consensus-based social network analysis (SNA) for\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60008, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.09009009009009009, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60008", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60008

text collaborative algorithm, which consensus-based social network analysis (SNA) for\n

", + "default_label": "raw_text" + }, + "179": { + "id": 179, + "task_path": "images/000001_000179_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60022.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 72, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60022, + "_line": "semantic video annotation. In this method, the videos are sorted out similar to social\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60022, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.10810810810810811, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60022", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60022

text semantic video annotation. In this method, the videos are sorted out similar to social\n

", + "default_label": "raw_text" + }, + "180": { + "id": 180, + "task_path": "images/000001_000180_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60035.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 84, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60035, + "_line": "networks. In this work, the content present in the video was described semantically\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60035, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.12612612612612611, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60035", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60035

text networks. In this work, the content present in the video was described semantically\n

", + "default_label": "raw_text" + }, + "181": { + "id": 181, + "task_path": "images/000001_000181_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60045.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 96, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60045, + "_line": "utilizing an ontology-based approach. Simple semantics inferred from the ontology\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60045, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.14414414414414414, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60045", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60045

text utilizing an ontology-based approach. Simple semantics inferred from the ontology\n

", + "default_label": "raw_text" + }, + "182": { + "id": 182, + "task_path": "images/000001_000182_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60060.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 108, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60060, + "_line": "works fine for the datasets consisting of a similar type of videos. However, they may\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60060, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.16216216216216217, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60060", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60060

text works fine for the datasets consisting of a similar type of videos. However, they may\n

", + "default_label": "raw_text" + }, + "183": { + "id": 183, + "task_path": "images/000001_000183_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60075.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 120, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60075, + "_line": "fail to annotate the dataset with different videos. So, tools that are more powerful are\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60075, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.18018018018018017, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60075", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60075

text fail to annotate the dataset with different videos. So, tools that are more powerful are\n

", + "default_label": "raw_text" + }, + "184": { + "id": 184, + "task_path": "images/000001_000184_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60086.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 132, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60086, + "_line": "required to learn semantics for substantial accumulations of videos with different\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60086, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.1981981981981982, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60086", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60086

text required to learn semantics for substantial accumulations of videos with different\n

", + "default_label": "raw_text" + }, + "185": { + "id": 185, + "task_path": "images/000001_000185_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60087.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 144, + "width": 37, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60087, + "_line": "activities.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60087, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 12, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 12, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 12, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.21621621621621623, \"width\": 0.08428246013667426, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60087", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60087

text activities.\n

", + "default_label": "raw_text" + }, + "186": { + "id": 186, + "task_path": "images/000001_000186_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60091.jpg", + "labeled": [ + "named_item" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 176, + "width": 225, + "height": 6 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60091, + "_line": "6 Deep-Learning Model-Based Techniques\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60091, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 39, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 39, + "name": "spacing", + "value": "26", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 2, + "end": 15, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 2, + "end": 15, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 16, + "end": 27, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 16, + "end": 27, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 28, + "end": 38, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 28, + "end": 38, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 39, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.26426426426426425, \"width\": 0.5125284738041003, \"height\": 0.009009009009009009, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60091", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60091

text 6 Deep-Learning Model-Based Techniques\n

", + "default_label": "raw_text" + }, + "187": { + "id": 187, + "task_path": "images/000001_000187_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60104.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 202, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60104, + "_line": "Recently, a feature representation for the video annotation is driven by deep learning-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60104, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "18", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.3033033033033033, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60104", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60104

text Recently, a feature representation for the video annotation is driven by deep learning-\n

", + "default_label": "raw_text" + }, + "188": { + "id": 188, + "task_path": "images/000001_000188_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60115.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 213, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60115, + "_line": "based techniques because of the significant development in deep architectures. This\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60115, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.31981981981981983, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60115", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60115

text based techniques because of the significant development in deep architectures. This\n

", + "default_label": "raw_text" + }, + "189": { + "id": 189, + "task_path": "images/000001_000189_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60128.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 225, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60128, + "_line": "allows various deep architectures to annotate large volume of video data available. The\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60128, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 84, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 84, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.33783783783783783, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60128", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60128

text allows various deep architectures to annotate large volume of video data available. The\n

", + "default_label": "raw_text" + }, + "190": { + "id": 190, + "task_path": "images/000001_000190_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60140.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 237, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60140, + "_line": "techniques are divided into two categories. The Convolution neural network (CNN) can\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60140, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.35585585585585583, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60140", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60140

text techniques are divided into two categories. The Convolution neural network (CNN) can\n

", + "default_label": "raw_text" + }, + "191": { + "id": 191, + "task_path": "images/000001_000191_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60154.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 249, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60154, + "_line": "be used for extracting features that are robust [28–30, 35]. Deep learning systems are\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60154, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.3738738738738739, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60154", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60154

text be used for extracting features that are robust [28–30, 35]. Deep learning systems are\n

", + "default_label": "raw_text" + }, + "192": { + "id": 192, + "task_path": "images/000001_000192_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60162.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 261, + "width": 208, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60162, + "_line": "utilized for the semantic label relationship [41, 42].\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60162, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 55, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 55, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 55, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.3918918918918919, \"width\": 0.47380410022779046, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60162", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60162

text utilized for the semantic label relationship [41, 42].\n

", + "default_label": "raw_text" + }, + "193": { + "id": 193, + "task_path": "images/000001_000193_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60174.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 68, + "y_top_left": 273, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60174, + "_line": "Robust visual features are the primary component for video annotation. The con-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60174, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.4099099099099099, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60174", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60174

text Robust visual features are the primary component for video annotation. The con-\n

", + "default_label": "raw_text" + }, + "194": { + "id": 194, + "task_path": "images/000001_000194_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60186.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 285, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60186, + "_line": "ventional handcrafted features are not reliable for complex concepts. The recent research\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60186, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 90, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 89, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 89, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.42792792792792794, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60186", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60186

text ventional handcrafted features are not reliable for complex concepts. The recent research\n

", + "default_label": "raw_text" + }, + "195": { + "id": 195, + "task_path": "images/000001_000195_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60198.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 297, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60198, + "_line": "works available generates robust visual features utilizing CNN because of the merito-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60198, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.44594594594594594, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60198", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60198

text works available generates robust visual features utilizing CNN because of the merito-\n

", + "default_label": "raw_text" + }, + "196": { + "id": 196, + "task_path": "images/000001_000196_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60211.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 309, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60211, + "_line": "rious behavior of CNN in computer vision. In [31], a multi-semantic video annotation\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60211, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.46396396396396394, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60211", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60211

text rious behavior of CNN in computer vision. In [31], a multi-semantic video annotation\n

", + "default_label": "raw_text" + }, + "197": { + "id": 197, + "task_path": "images/000001_000197_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60222.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 321, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60222, + "_line": "technique is proposed to represent the high-level semantic information using the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60222, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.481981981981982, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60222", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60222

text technique is proposed to represent the high-level semantic information using the\n

", + "default_label": "raw_text" + }, + "198": { + "id": 198, + "task_path": "images/000001_000198_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60235.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 333, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60235, + "_line": "semantic network and to map the relationships between the content. The keyframes are\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60235, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60235", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60235

text semantic network and to map the relationships between the content. The keyframes are\n

", + "default_label": "raw_text" + }, + "199": { + "id": 199, + "task_path": "images/000001_000199_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60249.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 345, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60249, + "_line": "extracted from the video and CNN’s is used to extract low-level visual features, which\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60249, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5180180180180181, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60249", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60249

text extracted from the video and CNN’s is used to extract low-level visual features, which\n

", + "default_label": "raw_text" + }, + "200": { + "id": 200, + "task_path": "images/000001_000200_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60264.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 357, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60264, + "_line": "recognize the content in the videos. In [32], the authors have proposed a method that\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60264, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.536036036036036, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60264", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60264

text recognize the content in the videos. In [32], the authors have proposed a method that\n

", + "default_label": "raw_text" + }, + "201": { + "id": 201, + "task_path": "images/000001_000201_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60279.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 369, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60279, + "_line": "exhibits another way to deal with weakly labeled sequence data using the learning in a\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60279, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5540540540540541, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60279", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60279

text exhibits another way to deal with weakly labeled sequence data using the learning in a\n

", + "default_label": "raw_text" + }, + "202": { + "id": 202, + "task_path": "images/000001_000202_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60292.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 381, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60292, + "_line": "frame-based classifier by embedding a CNN in an iterative EM algorithm. An active\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60292, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5720720720720721, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60292", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60292

text frame-based classifier by embedding a CNN in an iterative EM algorithm. An active\n

", + "default_label": "raw_text" + }, + "203": { + "id": 203, + "task_path": "images/000001_000203_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60306.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 393, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60306, + "_line": "learning method based on visualization is proposed in [33] for video annotation. In [34],\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60306, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 90, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 84, + "end": 89, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 84, + "end": 89, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.5900900900900901, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60306", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60306

text learning method based on visualization is proposed in [33] for video annotation. In [34],\n

", + "default_label": "raw_text" + }, + "204": { + "id": 204, + "task_path": "images/000001_000204_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60317.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 405, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60317, + "_line": "the authors proposed a multimodal feature learning mechanism based on Stacked\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60317, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6081081081081081, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60317", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60317

text the authors proposed a multimodal feature learning mechanism based on Stacked\n

", + "default_label": "raw_text" + }, + "205": { + "id": 205, + "task_path": "images/000001_000205_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60327.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 417, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60327, + "_line": "Contractive Autoencoder (SCAE) deep networks for automatic video annotation. In\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60327, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6261261261261262, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60327", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60327

text Contractive Autoencoder (SCAE) deep networks for automatic video annotation. In\n

", + "default_label": "raw_text" + }, + "206": { + "id": 206, + "task_path": "images/000001_000206_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60340.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 429, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60340, + "_line": "[36], the proposed method is utilized to generate video representation with an emphasis\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60340, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6441441441441441, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60340", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60340

text [36], the proposed method is utilized to generate video representation with an emphasis\n

", + "default_label": "raw_text" + }, + "207": { + "id": 207, + "task_path": "images/000001_000207_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60349.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 441, + "width": 318, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60349, + "_line": "on temporal modeling namely Hierarchical Recurrent Neural Encoder (HRNE).\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60349, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 74, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 74, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 74, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6621621621621622, \"width\": 0.724373576309795, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60349", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60349

text on temporal modeling namely Hierarchical Recurrent Neural Encoder (HRNE).\n

", + "default_label": "raw_text" + }, + "208": { + "id": 208, + "task_path": "images/000001_000208_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60360.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 68, + "y_top_left": 453, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60360, + "_line": "The CNN-RNN framework [40] uses Recurrent Neural Networks (RNN) with a\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60360, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 71, + "name": "indentation", + "value": "68", + "is_mergeable": true + }, + { + "start": 0, + "end": 71, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 71, + "name": "bounding box", + "value": "{\"x_top_left\": 0.1548974943052392, \"y_top_left\": 0.6801801801801802, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60360", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60360

text The CNN-RNN framework [40] uses Recurrent Neural Networks (RNN) with a\n

", + "default_label": "raw_text" + }, + "209": { + "id": 209, + "task_path": "images/000001_000209_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60370.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 465, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60370, + "_line": "moderate level of computational complexity to identify high-order label relationships.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60370, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.6981981981981982, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60370", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60370

text moderate level of computational complexity to identify high-order label relationships.\n

", + "default_label": "raw_text" + }, + "210": { + "id": 210, + "task_path": "images/000001_000210_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60381.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 477, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60381, + "_line": "The framework uses convolutional and recurrent neural network mutually used for\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60381, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7162162162162162, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60381", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60381

text The framework uses convolutional and recurrent neural network mutually used for\n

", + "default_label": "raw_text" + }, + "211": { + "id": 211, + "task_path": "images/000001_000211_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60392.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 488, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60392, + "_line": "determining the correlation between the adjacent labels and video representations. The\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60392, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 83, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7327327327327328, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60392", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60392

text determining the correlation between the adjacent labels and video representations. The\n

", + "default_label": "raw_text" + }, + "212": { + "id": 212, + "task_path": "images/000001_000212_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60406.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 503, + "width": 345, + "height": 3 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60406, + "_line": "final outputs are computed based on the similarities among labels. In [37, 38], a\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60406, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 81, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "spacing", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 81, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7552552552552553, \"width\": 0.785876993166287, \"height\": 0.0045045045045045045, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60406", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60406

text final outputs are computed based on the similarities among labels. In [37, 38], a\n

", + "default_label": "raw_text" + }, + "213": { + "id": 213, + "task_path": "images/000001_000213_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60416.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 512, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60416, + "_line": "combinational deep neural networks based two-stream structure is proposed. The\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60416, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7687687687687688, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60416", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60416

text combinational deep neural networks based two-stream structure is proposed. The\n

", + "default_label": "raw_text" + }, + "214": { + "id": 214, + "task_path": "images/000001_000214_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60428.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 524, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60428, + "_line": "structure is made out of two segments. The parallel two-stream encoding segment\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60428, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.7867867867867868, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60428", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60428

text structure is made out of two segments. The parallel two-stream encoding segment\n

", + "default_label": "raw_text" + }, + "215": { + "id": 215, + "task_path": "images/000001_000215_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60441.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 536, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60441, + "_line": "which 3D CNN and takes video encoding from different sources. The input encoded\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60441, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8048048048048048, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60441", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60441

text which 3D CNN and takes video encoding from different sources. The input encoded\n

", + "default_label": "raw_text" + }, + "216": { + "id": 216, + "task_path": "images/000001_000216_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60448.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 548, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60448, + "_line": "video representations are transferred using Long-Short-Term-Memory (LSTM)-based\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60448, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 80, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 21, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 22, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 80, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8228228228228228, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60448", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60448

text video representations are transferred using Long-Short-Term-Memory (LSTM)-based\n

", + "default_label": "raw_text" + }, + "217": { + "id": 217, + "task_path": "images/000001_000217_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60461.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 560, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60461, + "_line": "decoding language to the annotations. In [39], the authors have proposed a hierarchical\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60461, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8408408408408409, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60461", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60461

text decoding language to the annotations. In [39], the authors have proposed a hierarchical\n

", + "default_label": "raw_text" + }, + "218": { + "id": 218, + "task_path": "images/000001_000218_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60472.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 572, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60472, + "_line": "LSTM with adjusted temporal attention (hLSTMat) approach for video annotation. In\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60472, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8588588588588588, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60472", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60472

text LSTM with adjusted temporal attention (hLSTMat) approach for video annotation. In\n

", + "default_label": "raw_text" + }, + "219": { + "id": 219, + "task_path": "images/000001_000219_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60484.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 584, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60484, + "_line": "[43], a framework is presented which simultaneously explores the learning of LSTM\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60484, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 77, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 77, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8768768768768769, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60484", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60484

text [43], a framework is presented which simultaneously explores the learning of LSTM\n

", + "default_label": "raw_text" + }, + "220": { + "id": 220, + "task_path": "images/000001_000220_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60495.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 53, + "y_top_left": 596, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60495, + "_line": "and visual-semantic embedding (LSTM-E). In [44], a deep architecture is presented,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60495, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "53", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 29, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 29, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 30, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 30, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12072892938496584, \"y_top_left\": 0.8948948948948949, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60495", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60495

text and visual-semantic embedding (LSTM-E). In [44], a deep architecture is presented,\n

", + "default_label": "raw_text" + }, + "221": { + "id": 221, + "task_path": "images/000001_000221_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_60504.jpg", + "labeled": [ + "other" + ], + "data": { + "location": { + "page_number": 5, + "bbox": { + "x_top_left": 120, + "y_top_left": 35, + "width": 278, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 60504, + "_line": "A State-of-Art Review on Automatic Video Annotation Techniques 1065\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 5, + "line_id": 60504, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 68, + "name": "indentation", + "value": "120", + "is_mergeable": true + }, + { + "start": 0, + "end": 68, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 2, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 15, + "end": 21, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 22, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 25, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 35, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 52, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "size", + "value": "8", + "is_mergeable": true + }, + { + "start": 63, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 68, + "name": "bounding box", + "value": "{\"x_top_left\": 0.2733485193621868, \"y_top_left\": 0.052552552552552555, \"width\": 0.6332574031890661, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_60504", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 5

line_id 60504

text A State-of-Art Review on Automatic Video Annotation Techniques 1065\n

", + "default_label": "raw_text" + }, + "222": { + "id": 222, + "task_path": "images/000001_000222_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70011.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 60, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70011, + "_line": "which combines the transferred semantic attributes gained from images and video to\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70011, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "0", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 62, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 63, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 63, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.09009009009009009, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70011", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70011

text which combines the transferred semantic attributes gained from images and video to\n

", + "default_label": "raw_text" + }, + "223": { + "id": 223, + "task_path": "images/000001_000223_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70017.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 72, + "width": 187, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70017, + "_line": "the CNN plus RNN framework (LSTM-TSA).\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70017, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 39, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 39, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 39, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.10810810810810811, \"width\": 0.42596810933940776, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70017", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70017

text the CNN plus RNN framework (LSTM-TSA).\n

", + "default_label": "raw_text" + }, + "224": { + "id": 224, + "task_path": "images/000001_000224_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70029.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 54, + "y_top_left": 84, + "width": 330, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70029, + "_line": "It can be inferred that the deep learning-based annotation techniques have conveyed\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70029, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "54", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.12300683371298406, \"y_top_left\": 0.12612612612612611, \"width\": 0.7517084282460137, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70029", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70029

text It can be inferred that the deep learning-based annotation techniques have conveyed\n

", + "default_label": "raw_text" + }, + "225": { + "id": 225, + "task_path": "images/000001_000225_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70041.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 96, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70041, + "_line": "both challenges and chances to AVA techniques. The late advancement and “leaps\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70041, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.14414414414414414, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70041", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70041

text both challenges and chances to AVA techniques. The late advancement and “leaps\n

", + "default_label": "raw_text" + }, + "226": { + "id": 226, + "task_path": "images/000001_000226_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70054.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 108, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70054, + "_line": "forward” in deep learning assures to enhance the performance of video annotation on\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70054, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.16216216216216217, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70054", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70054

text forward” in deep learning assures to enhance the performance of video annotation on\n

", + "default_label": "raw_text" + }, + "227": { + "id": 227, + "task_path": "images/000001_000227_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70067.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 120, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70067, + "_line": "large-scale video datasets. Yet there are two drawbacks present in the deep learning-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70067, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.18018018018018017, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70067", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70067

text large-scale video datasets. Yet there are two drawbacks present in the deep learning-\n

", + "default_label": "raw_text" + }, + "228": { + "id": 228, + "task_path": "images/000001_000228_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70080.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 132, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70080, + "_line": "based annotation techniques. The main drawback is the decline in the efficiency of\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70080, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 37, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 37, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 38, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 38, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.1981981981981982, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70080", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70080

text based annotation techniques. The main drawback is the decline in the efficiency of\n

", + "default_label": "raw_text" + }, + "229": { + "id": 229, + "task_path": "images/000001_000229_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70094.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 144, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70094, + "_line": "these video annotation techniques because of the increase in the hidden layers of the\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70094, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.21621621621621623, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70094", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70094

text these video annotation techniques because of the increase in the hidden layers of the\n

", + "default_label": "raw_text" + }, + "230": { + "id": 230, + "task_path": "images/000001_000230_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70105.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 156, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70105, + "_line": "deep neural network. The absence of complete theoretical guidance and unified\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70105, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 77, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 69, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 69, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 70, + "end": 76, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 70, + "end": 76, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 77, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.23423423423423423, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70105", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70105

text deep neural network. The absence of complete theoretical guidance and unified\n

", + "default_label": "raw_text" + }, + "231": { + "id": 231, + "task_path": "images/000001_000231_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70117.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 168, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70117, + "_line": "structure to decide the network structure leads to uncontrolled training phase involved\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70117, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 12, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 13, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 13, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 50, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 51, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 51, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.25225225225225223, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70117", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70117

text structure to decide the network structure leads to uncontrolled training phase involved\n

", + "default_label": "raw_text" + }, + "232": { + "id": 232, + "task_path": "images/000001_000232_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70122.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 180, + "width": 111, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70122, + "_line": "in the deep neural network.\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70122, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 28, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 28, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 28, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.2702702702702703, \"width\": 0.2528473804100228, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70122", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70122

text in the deep neural network.\n

", + "default_label": "raw_text" + }, + "233": { + "id": 233, + "task_path": "images/000001_000233_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70126.jpg", + "labeled": [ + "named_item" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 212, + "width": 158, + "height": 6 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70126, + "_line": "7 Discussion and Conclusions\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70126, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 29, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 29, + "name": "spacing", + "value": "26", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 0, + "end": 1, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 2, + "end": 12, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 2, + "end": 12, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 13, + "end": 16, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 17, + "end": 28, + "name": "size", + "value": "11", + "is_mergeable": true + }, + { + "start": 17, + "end": 28, + "name": "style", + "value": "OAMFJI+AdvTTeeee58d9.B", + "is_mergeable": true + }, + { + "start": 0, + "end": 29, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.3183183183183183, \"width\": 0.35990888382687924, \"height\": 0.009009009009009009, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70126", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70126

text 7 Discussion and Conclusions\n

", + "default_label": "raw_text" + }, + "234": { + "id": 234, + "task_path": "images/000001_000234_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70139.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 237, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70139, + "_line": "In this paper, five types of AVA techniques are discussed regarding its framework,\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": false, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70139, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "18", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 31, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 32, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 70, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 71, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 71, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.35585585585585583, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70139", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70139

text In this paper, five types of AVA techniques are discussed regarding its framework,\n

", + "default_label": "raw_text" + }, + "235": { + "id": 235, + "task_path": "images/000001_000235_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70149.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 249, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70149, + "_line": "merits, and challenges. The deep learning-based, the discriminative model-based, and\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70149, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 7, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 8, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 27, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 28, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 28, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 48, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 48, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 49, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 49, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.3738738738738739, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70149", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70149

text merits, and challenges. The deep learning-based, the discriminative model-based, and\n

", + "default_label": "raw_text" + }, + "236": { + "id": 236, + "task_path": "images/000001_000236_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70159.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 261, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70159, + "_line": "generative model-based annotation methods fall under the category of learning-based\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70159, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 46, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 46, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 47, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 47, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.3918918918918919, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70159", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70159

text generative model-based annotation methods fall under the category of learning-based\n

", + "default_label": "raw_text" + }, + "237": { + "id": 237, + "task_path": "images/000001_000237_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70168.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 273, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70168, + "_line": "techniques. In the discriminative model-based annotation technique, video annotation\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70168, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 85, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 85, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.4099099099099099, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70168", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70168

text techniques. In the discriminative model-based annotation technique, video annotation\n

", + "default_label": "raw_text" + }, + "238": { + "id": 238, + "task_path": "images/000001_000238_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70180.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 285, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70180, + "_line": "is considered as a multi-label classification problem. Hence, the binary classifier cannot\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70180, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 89, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 2, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 3, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 3, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 53, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 53, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 54, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 54, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 82, + "end": 88, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 82, + "end": 88, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 89, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.42792792792792794, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70180", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70180

text is considered as a multi-label classification problem. Hence, the binary classifier cannot\n

", + "default_label": "raw_text" + }, + "239": { + "id": 239, + "task_path": "images/000001_000239_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70192.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 297, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70192, + "_line": "determine the relation between the existing classes. Robust visual features are extracted\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70192, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 90, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 34, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 35, + "end": 43, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 35, + "end": 43, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 44, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 44, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 59, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 59, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 60, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 60, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 89, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 89, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 90, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.44594594594594594, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70192", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70192

text determine the relation between the existing classes. Robust visual features are extracted\n

", + "default_label": "raw_text" + }, + "240": { + "id": 240, + "task_path": "images/000001_000240_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70203.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 309, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70203, + "_line": "utilizing CNN in the deep learning-based techniques. They utilize alternate frame-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70203, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 83, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 13, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 14, + "end": 16, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 14, + "end": 16, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 17, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 25, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 25, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 26, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 26, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 82, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 82, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 83, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.46396396396396394, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70203", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70203

text utilizing CNN in the deep learning-based techniques. They utilize alternate frame-\n

", + "default_label": "raw_text" + }, + "241": { + "id": 241, + "task_path": "images/000001_000241_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70216.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 321, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70216, + "_line": "works, such as RNN, to infer the semantic label relationship, for automatic video\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70216, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 82, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 22, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 23, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 81, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 81, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 82, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.481981981981982, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70216", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70216

text works, such as RNN, to infer the semantic label relationship, for automatic video\n

", + "default_label": "raw_text" + }, + "242": { + "id": 242, + "task_path": "images/000001_000242_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70224.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 333, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70224, + "_line": "annotation. Comparatively, the distance-based similarity model-based technique fol-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70224, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 11, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 12, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 12, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 45, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 45, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 46, + "end": 56, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 46, + "end": 56, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 57, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 57, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 79, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 79, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70224", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70224

text annotation. Comparatively, the distance-based similarity model-based technique fol-\n

", + "default_label": "raw_text" + }, + "243": { + "id": 243, + "task_path": "images/000001_000243_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70237.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 345, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70237, + "_line": "lows a two-step framework. The prediction is done based on similar videos. The\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70237, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 79, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 4, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 5, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 5, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 66, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 66, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 67, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 67, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 78, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 79, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5180180180180181, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70237", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70237

text lows a two-step framework. The prediction is done based on similar videos. The\n

", + "default_label": "raw_text" + }, + "244": { + "id": 244, + "task_path": "images/000001_000244_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70249.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 357, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70249, + "_line": "implementation of applications with simple semantic features can be done easily using\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70249, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 86, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 17, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 18, + "end": 30, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 18, + "end": 30, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 31, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 31, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 42, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 43, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 64, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 65, + "end": 67, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 65, + "end": 67, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 68, + "end": 72, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 73, + "end": 79, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 80, + "end": 85, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 80, + "end": 85, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 86, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.536036036036036, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70249", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70249

text implementation of applications with simple semantic features can be done easily using\n

", + "default_label": "raw_text" + }, + "245": { + "id": 245, + "task_path": "images/000001_000245_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70258.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 369, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70258, + "_line": "Ontology-based algorithms. However, to learn more complex semantics, machine-\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70258, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 78, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 26, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 26, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 27, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 27, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 38, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 39, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 39, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 57, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 58, + "end": 68, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 58, + "end": 68, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 69, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 69, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 78, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5540540540540541, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70258", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70258

text Ontology-based algorithms. However, to learn more complex semantics, machine-\n

", + "default_label": "raw_text" + }, + "246": { + "id": 246, + "task_path": "images/000001_000246_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70271.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 381, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70271, + "_line": "learning techniques are required. A decision tree is a suitable technique for video\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70271, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 8, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 9, + "end": 19, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 9, + "end": 19, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 20, + "end": 23, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 24, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 24, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 49, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 50, + "end": 52, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 53, + "end": 54, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 53, + "end": 54, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 55, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 73, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 73, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 74, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5720720720720721, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70271", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70271

text learning techniques are required. A decision tree is a suitable technique for video\n

", + "default_label": "raw_text" + }, + "247": { + "id": 247, + "task_path": "images/000001_000247_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70284.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 393, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70284, + "_line": "annotation and retrieval because of the ease in use for implementation. These decision\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70284, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 87, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 10, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 11, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 32, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 33, + "end": 35, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 36, + "end": 39, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 40, + "end": 44, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 40, + "end": 44, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 45, + "end": 47, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 48, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 55, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 56, + "end": 71, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 56, + "end": 71, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 72, + "end": 77, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 78, + "end": 86, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 78, + "end": 86, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 87, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.5900900900900901, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70284", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70284

text annotation and retrieval because of the ease in use for implementation. These decision\n

", + "default_label": "raw_text" + }, + "248": { + "id": 248, + "task_path": "images/000001_000248_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70297.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 405, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70297, + "_line": "rules are used for intuitive mapping from low-level features to high-level concepts. It\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70297, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 88, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 5, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 6, + "end": 9, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 10, + "end": 14, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 10, + "end": 14, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 15, + "end": 18, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 19, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 19, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 41, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 41, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 42, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 42, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 60, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 61, + "end": 63, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 64, + "end": 74, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 64, + "end": 74, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 75, + "end": 84, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 75, + "end": 84, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 85, + "end": 87, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 85, + "end": 87, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 88, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6081081081081081, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70297", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70297

text rules are used for intuitive mapping from low-level features to high-level concepts. It\n

", + "default_label": "raw_text" + }, + "249": { + "id": 249, + "task_path": "images/000001_000249_img_bbox_txt_5108911bd628ad92134cf76f6e4d25b8_70313.jpg", + "labeled": [ + "raw_text" + ], + "data": { + "location": { + "page_number": 6, + "bbox": { + "x_top_left": 39, + "y_top_left": 417, + "width": 345, + "height": 5 + }, + "name": "", + "rotated_angle": 0.0 + }, + "order": 70313, + "_line": "can be inferred that the five type of AVA techniques relies on the different ways to\n", + "_metadata": { + "tag_hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "unknown" + }, + "hierarchy_level": { + "level_1": null, + "level_2": null, + "can_be_multiline": true, + "line_type": "raw_text" + }, + "page_id": 6, + "line_id": 70313, + "_LineMetadata__other_fields": {} + }, + "_annotations": [ + { + "start": 0, + "end": 84, + "name": "indentation", + "value": "39", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "spacing", + "value": "6", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 0, + "end": 3, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 4, + "end": 6, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 4, + "end": 6, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 7, + "end": 15, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 7, + "end": 15, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 16, + "end": 20, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 16, + "end": 20, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 21, + "end": 24, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 25, + "end": 28, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 29, + "end": 33, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 34, + "end": 36, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 37, + "end": 40, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 41, + "end": 51, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 52, + "end": 58, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 59, + "end": 61, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 62, + "end": 65, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 66, + "end": 75, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 66, + "end": 75, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 76, + "end": 80, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 76, + "end": 80, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "size", + "value": "9", + "is_mergeable": true + }, + { + "start": 81, + "end": 83, + "name": "style", + "value": "OAMFJJ+AdvTT5ada87cc", + "is_mergeable": true + }, + { + "start": 0, + "end": 84, + "name": "bounding box", + "value": "{\"x_top_left\": 0.0888382687927107, \"y_top_left\": 0.6261261261261262, \"width\": 0.785876993166287, \"height\": 0.0075075075075075074, \"page_width\": 439, \"page_height\": 666}", + "is_mergeable": false + } + ], + "_uid": "txt_5108911bd628ad92134cf76f6e4d25b8_70313", + "original_document": "1708604654_915.pdf", + "color": "#00f" + }, + "additional_info": "

page_id 6

line_id 70313

text can be inferred that the five type of AVA techniques relies on the different ways to\n

", + "default_label": "raw_text" + } +} \ No newline at end of file diff --git a/docs/source/_static/add_new_structure_type/config.json b/docs/source/_static/add_new_structure_type/config.json new file mode 100644 index 00000000..a4dd74a2 --- /dev/null +++ b/docs/source/_static/add_new_structure_type/config.json @@ -0,0 +1,26 @@ +{ + "title": "Labeling", + "port": 5555, + "debug": false, + "labels": [ + { "label": "title", "color": "#f00" }, + { "label": "author", "color": "#ffa500" }, + { "label": "affiliation", "color": "#055" }, + { "label": "named_item", "color": "#5f5" }, + { "label": "raw_text", "color": "#00f"}, + { "label": "caption", "color": "#f0f" }, + { "label": "reference", "color": "#00f080" }, + { "label": "other", "color": "#0ff" } + ], + "multiclass": false, + "input_path": "tasks.json", + "image_key": ["task_path"], + "default_label_key": ["default_label"], + "task_instruction_key": ["additional_info"], + "result_key": "labeled", + "output_path": "article_classifier_TASK_ID.json", + "instruction": "Press the button with the type or number for labeling.\nTo go to the next image with saving the result press Enter or a button «Save»", + "templates_dir": "examples", + "confirm_required": false, + "sampling": "sequential" +} \ No newline at end of file diff --git a/docs/source/_static/add_new_structure_type/download_page.png b/docs/source/_static/add_new_structure_type/download_page.png new file mode 100644 index 00000000..8589a694 Binary files /dev/null and b/docs/source/_static/add_new_structure_type/download_page.png differ diff --git a/docs/source/_static/add_new_structure_type/images_creation.png b/docs/source/_static/add_new_structure_type/images_creation.png new file mode 100644 index 00000000..71861f2c Binary files /dev/null and b/docs/source/_static/add_new_structure_type/images_creation.png differ diff --git a/docs/source/_static/add_new_structure_type/labeling_page.png b/docs/source/_static/add_new_structure_type/labeling_page.png new file mode 100644 index 00000000..fea2bdf8 Binary files /dev/null and b/docs/source/_static/add_new_structure_type/labeling_page.png differ diff --git a/docs/source/_static/add_new_structure_type/main_page.png b/docs/source/_static/add_new_structure_type/main_page.png new file mode 100644 index 00000000..362d7b02 Binary files /dev/null and b/docs/source/_static/add_new_structure_type/main_page.png differ diff --git a/docs/source/_static/add_new_structure_type/manager_page.png b/docs/source/_static/add_new_structure_type/manager_page.png new file mode 100644 index 00000000..72dda3a8 Binary files /dev/null and b/docs/source/_static/add_new_structure_type/manager_page.png differ diff --git a/docs/source/_static/add_new_structure_type/manifest.pdf b/docs/source/_static/add_new_structure_type/manifest.pdf new file mode 100644 index 00000000..449a4b0a Binary files /dev/null and b/docs/source/_static/add_new_structure_type/manifest.pdf differ diff --git a/docs/source/_static/add_new_structure_type/structure_pipeline.png b/docs/source/_static/add_new_structure_type/structure_pipeline.png new file mode 100644 index 00000000..501c302a Binary files /dev/null and b/docs/source/_static/add_new_structure_type/structure_pipeline.png differ diff --git a/docs/source/_static/add_new_structure_type/tasks_page.png b/docs/source/_static/add_new_structure_type/tasks_page.png new file mode 100644 index 00000000..57c26923 Binary files /dev/null and b/docs/source/_static/add_new_structure_type/tasks_page.png differ diff --git a/docs/source/_static/add_new_structure_type/waiting_page.png b/docs/source/_static/add_new_structure_type/waiting_page.png new file mode 100644 index 00000000..d2f7b893 Binary files /dev/null and b/docs/source/_static/add_new_structure_type/waiting_page.png differ diff --git a/docs/source/_static/code_examples/article_classifier.pkl.gz b/docs/source/_static/code_examples/article_classifier.pkl.gz new file mode 100644 index 00000000..a2f2355b Binary files /dev/null and b/docs/source/_static/code_examples/article_classifier.pkl.gz differ diff --git a/docs/source/_static/code_examples/article_classifier.py b/docs/source/_static/code_examples/article_classifier.py new file mode 100644 index 00000000..7555bc0e --- /dev/null +++ b/docs/source/_static/code_examples/article_classifier.py @@ -0,0 +1,45 @@ +from typing import List, Optional + +from article_feature_extractor import ArticleFeatureExtractor + +from dedoc.data_structures.line_with_meta import LineWithMeta +from dedoc.structure_extractors.line_type_classifiers.abstract_pickled_classifier import AbstractPickledLineTypeClassifier + + +class ArticleLineTypeClassifier(AbstractPickledLineTypeClassifier): + + def __init__(self, path: str, *, config: Optional[dict] = None) -> None: + super().__init__(config=config) + self.classifier, feature_extractor_parameters = self.load("article", path) + self.feature_extractor = ArticleFeatureExtractor() + + def predict(self, lines: List[LineWithMeta]) -> List[str]: + if len(lines) == 0: + return [] + + features = self.feature_extractor.transform([lines]) + labels_probability = self.classifier.predict_proba(features) + + # set empty lines as raw_text + raw_text_id = list(self.classifier.classes_).index("raw_text") + empty_line = [line.line.strip() == "" for line in lines] + labels_probability[empty_line, :] = 0 + labels_probability[empty_line, raw_text_id] = 1 + + # work with a title + labels = [self.classifier.classes_[i] for i in labels_probability.argmax(1)] + first_non_title = 0 + for i, line in enumerate(lines): + if "Abstract" in line.line or labels[i] not in ("title", "raw_text"): + first_non_title = i + break + + # probability=1 for title before the body, probability=0 for title after body of document has begun + title_id = list(self.classifier.classes_).index("title") + labels_probability[:first_non_title, :] = 0 + labels_probability[:first_non_title, title_id] = 1 + labels_probability[first_non_title:, title_id] = 0 + + labels = [self.classifier.classes_[i] for i in labels_probability.argmax(1)] + assert len(labels) == len(lines) + return labels diff --git a/docs/source/_static/code_examples/article_feature_extractor.py b/docs/source/_static/code_examples/article_feature_extractor.py new file mode 100644 index 00000000..f7523bf4 --- /dev/null +++ b/docs/source/_static/code_examples/article_feature_extractor.py @@ -0,0 +1,93 @@ +import re +from collections import defaultdict +from typing import Iterator, List, Optional, Tuple + +import pandas as pd + +from dedoc.data_structures.line_with_meta import LineWithMeta +from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor +from dedoc.structure_extractors.feature_extractors.list_features.list_features_extractor import ListFeaturesExtractor +from dedoc.structure_extractors.feature_extractors.list_features.list_utils import get_prefix +from dedoc.structure_extractors.feature_extractors.list_features.prefix.dotted_prefix import DottedPrefix +from dedoc.structure_extractors.feature_extractors.list_features.prefix.roman_prefix import RomanPrefix +from dedoc.structure_extractors.feature_extractors.utils_feature_extractor import normalization_by_min_max +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_digits_with_dots, regexps_item + + +class ArticleFeatureExtractor(AbstractFeatureExtractor): + + def __init__(self) -> None: + self.named_item_keywords = ("abstract", "introduction", "relatedwork", "conclusion", "references", "appendix", "acknowledgements") + self.caption_keywords = ("figure", "table", "listing", "algorithm") + + self.start_regexps = [ + regexps_item, # list like 1. + regexps_digits_with_dots, # lists like 1.1.1. or 1.1.1 + re.compile(r"^\s*\d+\s"), # digits and space after them + ] + self.list_feature_extractor = ListFeaturesExtractor(window_size=10, prefix_list=[DottedPrefix, RomanPrefix]) + + def parameters(self) -> dict: + return {} + + def fit(self, documents: List[LineWithMeta], y: Optional[List[str]] = None) -> "ArticleFeatureExtractor": + return self + + def transform(self, documents: List[List[LineWithMeta]], y: Optional[List[str]] = None) -> pd.DataFrame: + # merge matrices for all documents into one + result_matrix = pd.concat([self.__process_document(document) for document in documents], ignore_index=True) + + # sort columns names for reproducibility on different systems + features = sorted(result_matrix.columns) + return result_matrix[features].astype(float) + + def __process_document(self, lines: List[LineWithMeta]) -> pd.DataFrame: + # features for numbered items + _, list_features_df = self.list_feature_extractor.one_document(lines) + list_features_df["list_item"] = self._list_features(lines) + + # other features + features_dict = defaultdict(list) + for line in lines: + for feature_name, feature in self._one_line_features(line, len(lines)): + features_dict[feature_name].append(feature) + features_df = pd.DataFrame(features_dict) + + # features normalization + features_df["indentation"] = self._normalize_features(features_df.indentation) + features_df["font_size"] = self._normalize_features(features_df.font_size) + + # add features of 3 previous and 3 next neighbor lines + features_df = self.prev_next_line_features(features_df, 3, 3) + + # merge all features in one matrix + result_matrix = pd.concat([features_df, list_features_df], axis=1) + return result_matrix + + def _one_line_features(self, line: LineWithMeta, total_lines: int) -> Iterator[Tuple[str, int]]: + # visual features + yield "indentation", self._get_indentation(line) + yield "spacing", self._get_spacing(line) + yield "font_size", self._get_size(line) + yield "bold", self._get_bold(line) + bold_percent = self._get_bold_percent(line) + yield "bold_percent", bold_percent + yield "fully_bold", int(bold_percent == 1.) + + # textual features + text = line.line.lower() + text_wo_spaces = "".join(text.strip().split()) + yield "is_named_item", int(text_wo_spaces in self.named_item_keywords) + yield "is_caption", len([word for word in self.caption_keywords if word in text_wo_spaces]) + yield "digits_number", sum(c.isdigit() for c in text_wo_spaces) + yield "at_number", text_wo_spaces.count("@") + yield "is_lower", int(line.line.strip().islower()) + yield "is_upper", int(line.line.strip().isupper()) + yield from self._start_regexp(line.line, self.start_regexps) + prefix = get_prefix([DottedPrefix], line) + yield ("dotted_depth", len(prefix.numbers)) if prefix.name == DottedPrefix.name else ("dotted_depth", 0) + + # statistical features + yield "text_length", len(text.strip()) + yield "words_number", len(text.strip().split()) + yield "line_id", normalization_by_min_max(line.metadata.line_id, min_v=0, max_v=total_lines) diff --git a/docs/source/_static/code_examples/article_structure_extractor.py b/docs/source/_static/code_examples/article_structure_extractor.py new file mode 100644 index 00000000..cbb5d6cd --- /dev/null +++ b/docs/source/_static/code_examples/article_structure_extractor.py @@ -0,0 +1,66 @@ +import os +from typing import Optional + +from article_classifier import ArticleLineTypeClassifier + +from dedoc.data_structures.hierarchy_level import HierarchyLevel +from dedoc.data_structures.line_with_meta import LineWithMeta +from dedoc.data_structures.unstructured_document import UnstructuredDocument +from dedoc.structure_extractors.abstract_structure_extractor import AbstractStructureExtractor +from dedoc.structure_extractors.feature_extractors.list_features.list_utils import get_dotted_item_depth + + +class ArticleStructureExtractor(AbstractStructureExtractor): + """ + This class is used for extraction structure from English articles + """ + document_type = "article" + + def __init__(self, *, config: Optional[dict] = None) -> None: + super().__init__(config=config) + path = os.path.abspath(os.path.dirname(__file__)) # path to the directory where the classifier weights are located + self.classifier = ArticleLineTypeClassifier(path=os.path.join(path, "article_classifier.pkl.gz"), config=self.config) + + self.named_item_keywords = ("abstract", "introduction", "related work", "conclusion", "references", "appendix", "acknowledgements") + + def extract(self, document: UnstructuredDocument, parameters: Optional[dict] = None) -> UnstructuredDocument: + predictions = self.classifier.predict(document.lines) + assert len(predictions) == len(document.lines) + + for line, line_type in zip(document.lines, predictions): + + if line_type == "title": + # for root, level_1=0, level_2=0, can_be_multiline=True + line.metadata.hierarchy_level = HierarchyLevel.create_root() + continue + + if line_type == "named_item": + # for named_item, level_1=1,2, can_be_multiline=True + line.metadata.hierarchy_level = self.__handle_named_item(line=line, prediction=line_type, level_1=1) + + if line_type in ("author", "affiliation", "reference"): + line.metadata.hierarchy_level = HierarchyLevel(level_1=3, level_2=1, can_be_multiline=False, line_type=line_type) + continue + + # for caption and raw_text, level_1=None, level_2=None, can_be_multiline=True + line.metadata.hierarchy_level = HierarchyLevel.create_raw_text() + line.metadata.hierarchy_level.line_type = line_type + + return document + + def __handle_named_item(self, line: LineWithMeta, prediction: str, level_1: int) -> HierarchyLevel: + text = line.line.strip().lower() + + if text in self.named_item_keywords: + # for standard headers like Introduction, Conclusion, etc. + return HierarchyLevel(level_1=level_1, level_2=1, can_be_multiline=True, line_type=prediction) + + # get list depth for numerated headers, e.g. 1.1->2, 1.1.1->3, for lines without numeration -1 is returned + item_depth = get_dotted_item_depth(text) + + if item_depth == -1: + # for non-standard headers without numeration or continuation of multiline headers + return HierarchyLevel(level_1=level_1 + 1, level_2=1, can_be_multiline=True, line_type=prediction) + + # for headers with numeration + return HierarchyLevel(level_1=level_1, level_2=item_depth, can_be_multiline=True, line_type=prediction) diff --git a/docs/source/_static/code_examples/dedoc_add_new_structure_type_tutorial.py b/docs/source/_static/code_examples/dedoc_add_new_structure_type_tutorial.py new file mode 100644 index 00000000..16e479b1 --- /dev/null +++ b/docs/source/_static/code_examples/dedoc_add_new_structure_type_tutorial.py @@ -0,0 +1,21 @@ +from article_structure_extractor import ArticleStructureExtractor + +from dedoc import DedocManager +from dedoc.attachments_handler import AttachmentsHandler +from dedoc.converters import ConverterComposition +from dedoc.metadata_extractors import MetadataExtractorComposition, PdfMetadataExtractor +from dedoc.readers import PdfAutoReader, ReaderComposition +from dedoc.structure_constructors import StructureConstructorComposition, TreeConstructor +from dedoc.structure_extractors import StructureExtractorComposition + +manager_config = dict( + converter=ConverterComposition(converters=[]), + reader=ReaderComposition(readers=[PdfAutoReader()]), + structure_extractor=StructureExtractorComposition(extractors={ArticleStructureExtractor.document_type: ArticleStructureExtractor()}, + default_key=ArticleStructureExtractor.document_type), + structure_constructor=StructureConstructorComposition(constructors={"tree": TreeConstructor()}, default_constructor=TreeConstructor()), + document_metadata_extractor=MetadataExtractorComposition(extractors=[PdfMetadataExtractor()]), + attachments_handler=AttachmentsHandler(), +) + +manager = DedocManager(manager_config=manager_config) diff --git a/docs/source/_static/code_examples/train_article_line_classifier.py b/docs/source/_static/code_examples/train_article_line_classifier.py new file mode 100644 index 00000000..c8784e9b --- /dev/null +++ b/docs/source/_static/code_examples/train_article_line_classifier.py @@ -0,0 +1,58 @@ +import logging +import os +from typing import Optional + +from article_feature_extractor import ArticleFeatureExtractor + +from dedoc.config import get_config +from scripts.train.trainers.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer + + +def skip_labels(label: str) -> Optional[str]: + """ + Function for filtering `other` lines and do not train the classifier on them + """ + if label == "other": + return None + return label + + +classifier_name = "article_classifier" + +# configure path for saving a trained classifier +classifier_directory_path = os.path.join(os.path.expanduser("~"), ".cache", "dedoc", "resources", "line_type_classifiers") +os.makedirs(classifier_directory_path, exist_ok=True) +classifier_path = os.path.join(classifier_directory_path, f"{classifier_name}.pkl.gz") + +# configure paths for saving scores and features importances (this is not obligatory) +resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources")) +assert os.path.isdir(resources_path) +path_scores = os.path.join(resources_path, "benchmarks", f"{classifier_name}_scores.json") +path_feature_importances = os.path.join(resources_path, "feature_importances", f"{classifier_name}_feature_importances.xlsx") + +# features extractor for the classifier +feature_extractor = ArticleFeatureExtractor() + +# parameters of the XGBClassifier (https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.XGBClassifier) +classifier_parameters = dict(learning_rate=0.5, n_estimators=600, booster="gbtree", tree_method="hist", max_depth=3, colsample_bynode=0.8) + +# dedoc configuration (just in case) +config = get_config() + +# trainer initialization +trainer = XGBoostLineClassifierTrainer( + data_url="url with training dataset", + logger=config.get("logging", logging.getLogger()), + feature_extractor=feature_extractor, + path_out=classifier_path, + path_scores=path_scores, + path_features_importances=path_feature_importances, + tmp_dir="/tmp", + label_transformer=skip_labels, + classifier_parameters=classifier_parameters, + random_seed=42, + config=config, +) + +# run training of the classifier +trainer.fit(cross_val_only=False, save_errors_images=False, no_cache=False) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 3dd5a20a..f530b4f9 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,16 +1,31 @@ Changelog ========= +v2.1 (2024-03-05) +----------------- +Release note: `v2.1 `_ + +* Custom loggers deleted (the common logger is used for all dedoc classes). +* Do not change the document image if it has a correct orientation (orientation correction function changed). +* Use only `PdfTabbyReader` during detection of a textual layer in PDF files. +* Code related to the labeling mode refactored and removed from the library package (it is located in the separate directory). +* Added `BoldAnnotation` for words in `PdfImageReader`. +* More benchmarks are added: images of tables parsing, postprocessing of Tesseract OCR. +* Some fixes are made in a web-form of Dedoc. +* Tutorial how to add a new structure type to Dedoc added. +* Parsing of EML and HTML files fixed. + + v2.0 (2023-12-25) ----------------- Release note: `v2.0 `_ -* Fix table extraction from PDF using empty config (see `issue `_) -* Add more benchmarks for Tesseract -* Fix extension extraction for file names with several dots +* Fix table extraction from PDF using empty config (see `issue `_). +* Add more benchmarks for Tesseract. +* Fix extension extraction for file names with several dots. * Change names of some methods and their parameters for all main classes (attachments extractors, converters, readers, metadata extractors, structure extractors, structure constructors). - Please look to the `Package reference` of `documentation `_ for more details -* Add `AttachAnnotation` and `TableAnnotation` to `PPTX` (see `discussion `_) + Please look to the `Package reference` of `documentation `_ for more details. +* Add `AttachAnnotation` and `TableAnnotation` to `PPTX` (see `discussion `_). * Fix bugs in `DOCX` handling (see issues `378 `_, `379 `_ v1.1.1 (2023-11-24) diff --git a/docs/source/conf.py b/docs/source/conf.py index 0215a4b5..cd56e3cf 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -4,6 +4,7 @@ # -- Path setup -------------------------------------------------------------- +sys.path.insert(0, os.path.abspath(os.path.join("..", "..", "labeling"))) sys.path.insert(0, os.path.abspath(os.path.join("..", ".."))) @@ -23,6 +24,8 @@ "sphinx.ext.napoleon", "sphinx.ext.viewcode", "sphinx_rtd_theme", + "sphinx_copybutton", + "sphinx_togglebutton", "linuxdoc.rstFlatTable" ] exclude_patterns = [] diff --git a/docs/source/getting_started/usage.rst b/docs/source/getting_started/usage.rst index 9164721b..781d5ef9 100644 --- a/docs/source/getting_started/usage.rst +++ b/docs/source/getting_started/usage.rst @@ -3,7 +3,8 @@ Dedoc usage tutorial Suppose you've already have dedoc library installed. Otherwise :ref:`dedoc_installation` may be useful. -You can use dedoc as an application, see :ref:`dedoc_api` for more information. +.. seealso:: + You can use dedoc as an application, see :ref:`dedoc_api` for more information. If you have installed dedoc using pip, you can use different parts of :ref:`dedoc workflow` separately. @@ -37,16 +38,18 @@ we can convert it using :meth:`~dedoc.converters.DocxConverter.convert` method: :language: python :lines: 14 -To get the information about available converters, their methods and parameters see :ref:`dedoc_converters`. -The supported document formats that can be converted to another formats (which can be parsed by readers) are enlisted in the table :ref:`table_formats`. +.. seealso:: + To get the information about available converters, their methods and parameters see :ref:`dedoc_converters`. + The supported document formats that can be converted to another formats (which can be parsed by readers) are enlisted in the table :ref:`table_formats`. .. _using_readers: Using readers ------------- -To get the information about available readers, their methods and parameters see :ref:`dedoc_readers`. -The supported document formats that can be handled by readers are enlisted in the table :ref:`table_formats`. +.. seealso:: + To get the information about available readers, their methods and parameters see :ref:`dedoc_readers`. + The supported document formats that can be handled by readers are enlisted in the table :ref:`table_formats`. Let's consider an example of using readers. Assume we need to parse file :download:`example.docx <../_static/code_examples/test_dir/example.docx>`, which looks like follows: @@ -207,8 +210,10 @@ To extract metadata, one can add them to the document using :meth:`~dedoc.metada :lines: 66-70 As we see, the attribute ``metadata`` has been filled with some metadata fields. -The list of common fields for any metadata extractor along with the specific fields -for different document formats are enlisted in :ref:`dedoc_metadata_extractors`. + +.. seealso:: + The list of common fields for any metadata extractor along with the specific fields + for different document formats are enlisted in :ref:`dedoc_metadata_extractors`. Using attachments extractors ---------------------------- @@ -240,7 +245,8 @@ As we see, attachment extractors return the same list of :class:`~dedoc.data_str as in the attribute ``attachments`` of the :class:`~dedoc.data_structures.UnstructuredDocument`, that we can get via readers (see :ref:`using_readers`). -See :ref:`dedoc_attachments_extractors` to get more information about available extractors, their methods and parameters. +.. seealso:: + :ref:`dedoc_attachments_extractors` contains more information about available extractors, their methods and parameters. .. _using_structure_extractors: @@ -260,8 +266,9 @@ Let's extract the default structure based on the document styles: As we see, the ``hierarchy_level`` has been filled. -See :ref:`other_structure` for more details about the default document structure. -Use :ref:`dedoc_structure_extractors` to get the information about available structure extractors, their methods and parameters. +.. seealso:: + See :ref:`other_structure` for more details about the default document structure. + Use :ref:`dedoc_structure_extractors` to get the information about available structure extractors, their methods and parameters. Using structure constructors @@ -286,8 +293,9 @@ The main difference is in the ``content`` attribute, that contains hierarchical To get more information about :class:`~dedoc.data_structures.ParsedDocument`, :class:`~dedoc.data_structures.DocumentContent` and other classes, that form the output format, see :ref:`dedoc_data_structures`. -See :ref:`dedoc_structure_constructors` for the description of available structure constructors and structure types. -The description of :ref:`API output JSON format ` also may be useful. +.. seealso:: + See :ref:`dedoc_structure_constructors` for the description of available structure constructors and structure types. + The description of :ref:`API output JSON format ` also may be useful. Run the whole pipeline diff --git a/docs/source/index.rst b/docs/source/index.rst index b8ab6264..a69d157c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -53,6 +53,10 @@ The resulting output of any reader is a class :class:`~dedoc.data_structures.Uns See :ref:`readers' annotations ` and :ref:`readers' line types ` to get more details about information that can be extracted by each available reader. +.. seealso:: + Dedoc supports handling of a fixed list of document formats, but the list can be expanded by new handlers. + A tutorial how to add a new document format to Dedoc is :ref:`here `. + .. _table_formats: .. list-table:: Supported documents formats and the reader's output @@ -195,7 +199,10 @@ Dedoc allows to extract structure from the documents of some specific domains. For this purpose classifiers are used to predict the type of each document line/paragraph. Then some rules (mostly based on regular expressions) are used to find a hierarchy level of each line for the document tree representation. -It's possible to define a new structure extractor in order to handle documents of new domains. +.. seealso:: + It's possible to define a new structure extractor in order to handle documents of new domains. + A tutorial how to add a new structure type to Dedoc is :ref:`here `. + Currently the following domains can be handled: * Russian laws (:ref:`structure description `). @@ -212,17 +219,14 @@ For a document of unknown or unsupported domain there is an option to use defaul getting_started/installation getting_started/usage - -.. toctree:: - :maxdepth: 1 - parameters/parameters .. toctree:: :maxdepth: 1 :caption: Tutorials - tutorials/add_new_doc_type + tutorials/add_new_doc_format + tutorials/add_new_structure_type tutorials/creating_document_classes diff --git a/docs/source/structure_types/law.rst b/docs/source/structure_types/law.rst index 5b83a314..2534123f 100644 --- a/docs/source/structure_types/law.rst +++ b/docs/source/structure_types/law.rst @@ -27,7 +27,7 @@ We consider documents that have some common template: In the dedoc library two law types are separated and for each type specific structure is extracted: -* **Siple law**. The structure type for all Russian law types except order (приказ), state (положение) and instruction (инструкция) we will call simple law. +* **Simple law**. The structure type for all Russian law types except order (приказ), state (положение) and instruction (инструкция) we will call simple law. * **Foiv law**. The structure type for law types order (приказ), state (положение) and instruction (инструкция) we will call foiv law. The dedoc library allows to determine law type (simple law, foiv law) automatically during document analysis. diff --git a/docs/source/tutorials/add_new_doc_type.rst b/docs/source/tutorials/add_new_doc_format.rst similarity index 97% rename from docs/source/tutorials/add_new_doc_type.rst rename to docs/source/tutorials/add_new_doc_format.rst index c8a0d1dd..a1e6ec1a 100644 --- a/docs/source/tutorials/add_new_doc_type.rst +++ b/docs/source/tutorials/add_new_doc_format.rst @@ -1,7 +1,9 @@ -Adding support for a new document type to Dedoc -=============================================== +.. _add_document_format: -Suppose you need to add support for a new type "newtype". +Adding support for a new document format to Dedoc +================================================= + +Suppose you need to add support for a new format "newtype". Several ways of document processing exist: * **Converter** - you can write a converter from one document format to another; @@ -200,7 +202,7 @@ You can use the reader in your code: Adding the implemented handlers to the manager config ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -All implemented document handlers are linked to dedoc in `dedoc/manager_config.py `_ +All implemented document handlers are linked to dedoc in `dedoc/manager_config.py `_ You do not have to edit this file. Create your own ``manager_config`` with dedoc handlers you need and diff --git a/docs/source/tutorials/add_new_structure_type.rst b/docs/source/tutorials/add_new_structure_type.rst new file mode 100644 index 00000000..0970d86c --- /dev/null +++ b/docs/source/tutorials/add_new_structure_type.rst @@ -0,0 +1,119 @@ +.. _add_structure_type: + +Adding support for a new structure type to Dedoc +================================================ + +Dedoc supports several structure types, e.g. Russian :ref:`laws ` or :ref:`technical specifications ` +according to the description of the :ref:`structure extraction step `. +However, one may need to add support for their own structure type, e.g. for English documents. + +It's possible to define a new structure extractor in order to handle documents of new domains. +There are two main ways of creating a structure extractor that are used in Dedoc: + + * Usage of domain-specific rules and some heuristics to detect lines types and their hierarchy (see :ref:`heuristic_structure_extractor`). + * Document lines classification with machine learning and post-processing using domain-specific rules (see :ref:`classification_structure_extractor`). + +.. note:: + A custom structure extractor should fill ``hierarchy_level`` (:class:`~dedoc.data_structures.HierarchyLevel`) + field of :class:`~dedoc.data_structures.LineMetadata` for each document line (:class:`~dedoc.data_structures.LineWithMeta`). + See :ref:`add_structure_type_hierarchy_level` and documentation of the :class:`~dedoc.data_structures.HierarchyLevel` class + to get more information about lines hierarchy levels. + +While implementing a custom structure extractor, there are no limits about its architecture and used methods. +The two aforesaid methods are only examples how the document structure can be parsed. +As an alternative, a structure extractor can automatically detect subtypes of some structure and then call more specific structure extractors. +For instance, there are `simple law` and `foiv law` subtypes in :ref:`law structure type `. +Still, the rule about filling ``hierarchy_level`` field remains obligatory as it is the result of the structure extraction process. + +.. _heuristic_structure_extractor: + +Implementing structure extractor using heuristic rules +------------------------------------------------------ + +If there are strict rules describing a document structure, it's possible to get by only with regular expressions +while implementing a new structure extractor. +A code example of such a structure extractor can be found +`here `_ +-- this is a structure extractor for the :ref:`default structure type ` in Dedoc. + +.. seealso:: + The description of details related to the ``hierarchy_level`` can be found :ref:`here `. + +There are some useful tips for creation of a heuristic structure extractor: + + * Some readers can extract annotations about visual formatting that can be useful if some structure has a strict formatting rules. + Annotations are stored in the ``annotations`` attribute of each document line (:class:`~dedoc.data_structures.LineWithMeta`). + Information about readers possibilities is given in :ref:`readers_annotations`. + + * Some readers can extract preliminary lines types and their levels with the help of internal content provided by the format. + Information about readers possibilities to extract line types is given in :ref:`readers_line_types`. + Note that some readers extract list items using regular expressions that can differ from a custom domain, + only :class:`~dedoc.readers.DocxReader` and HTML-based readers extract list items with their levels without using regular expressions. + + * The information about hierarchy level given in :ref:`hierarchy level description ` and + documentation of the :class:`~dedoc.data_structures.HierarchyLevel` class can help in filling ``hierarchy_level`` field. + + * Hierarchy level fields ``level_1`` and ``level_2`` may be ``None`` for lines with types ``raw_text`` and ``unknown``. + In this case, while structure construction step, these lines will become leaves of a document tree (they have least importance). + + + +.. _classification_structure_extractor: + +Implementing structure extractor using lines classification +----------------------------------------------------------- + +.. warning:: + We plan to change structure extraction pipeline in future to make in more user-friendly and easy-to-use. + Still, this process isn't fast, therefore here we give actual information about implementing new structure extractors. + +In Dedoc, several structure extractors use a typical pipeline for retrieving line types and levels. +This pipeline is visualized and its main parts are described below. + +.. _structure_pipeline_image: + +.. figure:: ../_static/add_new_structure_type/structure_pipeline.png + :width: 500 + + Pipeline for a structure extractor with lines classification + +The structure extraction pipeline uses ML lines classification so a training dataset should be created. +For classification, different models can be used, but our experience showed the most suitable model -- +`XGBClassifier `_. +In case of using XGBClassifier, it is necessary to build a feature matrix for feeding a classifier. +After implementing features extraction step, a classifier is trained on the gathered and labeled data. +As any ML model, the classifier can make mistakes -- they may be fixed afterwards along with hierarchy level determination. +All these nuances form following steps for adding a new structure extractor: + + 1. **Dataset creation.** The step includes data gathering, their preparation for annotating (labeling) and the labeling itself. + We propose an `external labeling system `_ for document lines labeling. + The detailed description of this step is given in :ref:`add_structure_type_dataset_creation`. + + 2. **Features extraction.** The step is intended for making a feature matrix for the subsequent training of a classifier. + For each document line a feature vector is formed based on line's text, appearance, position inside the document, etc. + Moreover, some context can be used -- features of the adjacent lines, statistics for the entire document (e.g. mean font size). + This step is described in :ref:`add_structure_type_features_extraction`. + + 3. **Lines classification.** This step is connected with training a lines classifier, + selection of optimal hyperparameters and analysis of features importance for classifier decisions. + The step is described in :ref:`add_structure_type_lines_classification`. + + 4. **Post-processing of results.** During this step, some rules are utilized to correct classifier's predictions. + Also, hierarchy level (``level_1``, ``level_2``) should be filled according to the predicted types and specific domain rules. + The step is described in :ref:`add_structure_type_post_processing`. + + 5. **Adding a new structure extractor to the main pipeline.** + :ref:`Here ` we give an example how to use the implemented custom extractor in Dedoc. + +**The following pages may be useful for further reading:** + +.. toctree:: + :maxdepth: 1 + + add_new_structure_type/dataset_creation + add_new_structure_type/features_extraction + add_new_structure_type/lines_classification + add_new_structure_type/post_processing + add_new_structure_type/conclusion + add_new_structure_type/hierarchy_level + add_new_structure_type/classes diff --git a/docs/source/tutorials/add_new_structure_type/classes.rst b/docs/source/tutorials/add_new_structure_type/classes.rst new file mode 100644 index 00000000..ca074348 --- /dev/null +++ b/docs/source/tutorials/add_new_structure_type/classes.rst @@ -0,0 +1,50 @@ +Main classes related to the structure extraction via lines classification +========================================================================= + +Here described the main classes used for features extraction from textual lines, +line classifier training and classifier usage for lines' types prediction. + +Features extraction +------------------- + +.. autoclass:: dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor + :members: + :private-members: + +Training utilities +------------------ + +.. autoclass:: scripts.train.trainers.data_loader.DataLoader + :members: + :special-members: __init__ + +.. autoclass:: scripts.train.trainers.dataset.LineClassifierDataset + :members: + :special-members: __init__ + +.. autoclass:: scripts.train.trainers.base_sklearn_line_classifier.BaseClassifier + :show-inheritance: + :members: + +.. autoclass:: scripts.train.trainers.base_sklearn_line_classifier.BaseSklearnLineClassifierTrainer + :members: + :private-members: + :special-members: __init__ + +.. autoclass:: scripts.train.trainers.xgboost_line_classifier_trainer.XGBoostLineClassifierTrainer + :show-inheritance: + :members: + :private-members: + +.. autoclass:: scripts.train.trainers.errors_saver.ErrorsSaver + :members: + +Classification +-------------- + +.. autoclass:: dedoc.structure_extractors.line_type_classifiers.abstract_line_type_classifier.AbstractLineTypeClassifier + :members: + +.. autoclass:: dedoc.structure_extractors.line_type_classifiers.abstract_pickled_classifier.AbstractPickledLineTypeClassifier + :show-inheritance: + :members: diff --git a/docs/source/tutorials/add_new_structure_type/conclusion.rst b/docs/source/tutorials/add_new_structure_type/conclusion.rst new file mode 100644 index 00000000..ce53533e --- /dev/null +++ b/docs/source/tutorials/add_new_structure_type/conclusion.rst @@ -0,0 +1,27 @@ +.. _add_structure_type_conclusion: + +Adding implemented extractor to the main pipeline +================================================= + +.. seealso:: + Before doing this step, implementation of a structure extractor should be done. + :ref:`Here ` there is an instruction how to add a new structure extractor. + +All implemented document handlers are linked in `dedoc/manager_config.py `_ +You may edit this file or create your own ``manager_config`` with dedoc handlers you need and +your custom handlers directly in your code. + +Let's consider an example. +In the :ref:`tutorial` about structure extractor implementation, +we implemented the class ``ArticleStructureExtractor`` for handling English scientific articles in PDF format. +Example of a manager config with PDF handlers and the implemented structure extractor: + +.. literalinclude:: ../../_static/code_examples/dedoc_add_new_structure_type_tutorial.py + :language: python + +After initialization of ``manager``, it can be used for document parsing, +``document_type`` parameter is equal to the ``ArticleStructureExtractor.document_type`` attribute. + +.. code-block:: python + + result = manager.parse(file_path=file_path, parameters={"document_type": "article"}) diff --git a/docs/source/tutorials/add_new_structure_type/dataset_creation.rst b/docs/source/tutorials/add_new_structure_type/dataset_creation.rst new file mode 100644 index 00000000..83e6a937 --- /dev/null +++ b/docs/source/tutorials/add_new_structure_type/dataset_creation.rst @@ -0,0 +1,302 @@ +.. _add_structure_type_dataset_creation: + +Dataset creation +================ + +In this section, the terms ``labeling`` and ``annotating`` are considered as equivalent. + +Functionality, described below, is available after the whole source code was downloaded. +It can be done in different ways, e.g. downloading a `zip archive `_ or cloning the repository: + + .. code-block:: bash + + git clone https://github.com/ispras/dedoc + +The utilities for dataset creation are located in the ``labeling`` directory of the project root. + +.. note:: + In this tutorial, we suppose that the user has a Unix-based operating system with installed `python3.9+` and `Docker`. + +We propose the following sequence of actions to create a training dataset for a new document domain: + + 1. **Task description**. We consider a line classification task, therefore, in this step, + it is necessary to enlist possible line types for the new domain. + For this purpose, we use a file with task description ``manifest.pdf``, which can be shared with other annotators. + + 2. **Tasker adding**. In this step, a new tasker is integrated into the labeling system. + Taskers are utilized to make tasks for an `external labeling system `_, + where annotators will annotate the data. + Taskers take as an input parsed document lines, make images for annotating and link lines with created images. + As a result, annotated images are equivalent to annotated document lines. + + 3. **Tasks creation**. To create tasks for annotating, one should run our labeling API and upload documents of a desired domain. + Firstly, uploaded documents are parsed with Dedoc, secondly, tasks for annotating are created by a tasker according to the chosen task type. + After this step, everything is ready for data annotation. + + 4. **Data labeling**. The annotating process is mostly executed in the `external labeling system `_, + after which the results of several tasks labeling are merged into one dataset. + +After dataset creation, the result can be uploaded into some cloud storage and used for training by a classifier. +Below we describe the steps in detail using the example domain of English scientific articles. + + +Make task description +--------------------- + +First of all, the list of line types should be determined. +Each type should be described in order to clearly understand how to annotate lines. +Some concrete examples of lines in documents can alleviate the line types description. +Also, it is necessary to choose a name for each line type: it is recommended to use names like for Python variables. +As a result, the file ``manifest.pdf`` should be made for a further work. + +In our case, (we consider the domain of English scientific articles) ``manifest.pdf`` can look like this: +:download:`manifest.pdf <../../_static/add_new_structure_type/manifest.pdf>`. +This file contains listing and description of the main line types along with some visual examples. +We highlighted the following line types: + + * Title + * Author + * Affiliation + * Named item + * Caption + * Reference + * Raw text + * Other + +For each type, the description with examples is given. Additionally, some unclear cases are explained. + +It is also recommended to save a file in editable format for fixing some mistakes or adding clarifications, for example, in ODT format. +The file may be empty, if there is only one annotator who knows the task perfectly. +Still, we recommend to describe the annotation rules just in case. + +Add tasker +---------- + +Task creation consists mainly of two steps: document lines parsing by Dedoc and tasks creation based on the parsed lines. +Generally speaking, a single task consists of document lines with meta-information from Dedoc, +images for annotating linked to the document lines, and original documents uploaded for tasks creation. + +Taskers are special classes for making tasks: they take as an input parsed document lines, +make images for annotating and link lines to the created images. +Linkage a line to an image allows annotators to see visual representation of a document during annotation process. +It is useful since a lot of information about line type is connected with line's appearance. + +For example, if we need to label each line in a document image, tasker may generate images for labeling of each line like this: + +.. figure:: ../../_static/add_new_structure_type/images_creation.png + :width: 800 + + +In the step of adding a tasker, we can configure task creation. +It is better to work with the source code directly, similarly adding new code to the existing one. + +1. Add auxiliary files ``manifest.pdf`` and ``config.json`` for a labeling system to the project resources: the subdirectory of the directory ``labeling/resources``. + + * The file ``manifest.pdf`` was made on the previous step. + * The file ``config.json`` contains settings for our labeling system: which names for labels to use, + which colors to use to draw bounding boxes for each line type, etc. + The description of ``config.json`` is given in the README file `here `_. + The most important part of the file is ``labels`` key -- labels should have the same name as described in ``manifest.pdf``. + The example of configuration file for article labeling: :download:`config.json <../../_static/add_new_structure_type/config.json>` + + In our example, let's call the subdirectory ``article``, + then the files ``manifest.pdf`` and ``config.json`` should be located in ``labeling/resources/article``. + +2. Add new tasker to the ``taskers`` dictionary in the ``labeling/train_dataset/api/api.py``: + + .. code-block:: python + + taskers = { + "law_classifier": LineLabelTasker( + path2lines=path2lines, + path2docs=get_path_original_documents(config), + manifest_path=os.path.join(train_resources_path, "law", "manifest.pdf"), + config_path=os.path.join(train_resources_path, "law", "config.json"), + tmp_dir=UPLOAD_FOLDER, + progress_bar=progress_bar, + item2label=lambda t: label2label_law.get(t["_metadata"]["hierarchy_level"]["line_type"], "raw_text"), + config=config), + "article_classifier": LineLabelTasker( + path2lines=path2lines, + path2docs=get_path_original_documents(config), + manifest_path=os.path.join(train_resources_path, "article", "manifest.pdf"), + config_path=os.path.join(train_resources_path, "article", "config.json"), + tmp_dir=UPLOAD_FOLDER, + progress_bar=progress_bar, + item2label=lambda t: label2label_article.get(t["_metadata"]["hierarchy_level"]["line_type"], "raw_text"), + config=config), + ... + } + + ``LineLabelTasker`` -- a default class for making tasks for each document line. + It is not obligatory to use ``LineLabelTasker`` class, + e.g. there is ``FilteredLineLabelTasker`` that creates tasks only for pages containing headers from the document ToC. + It's possible to implement a custom tasker class for specific needs. + + Note: ``item2label`` parameter is used for mapping **Dedoc line types → line types for annotating prompt** (defined in the manifest). + Dedoc saves classified document lines (:class:`~dedoc.data_structures.UnstructuredDocument`) with some detected types + (according to the ``document_type`` API parameter) - these types may differ from the manifest types. + Line types extracted by Dedoc may be used as initial prompt for annotators (e.g. when we already have a structure extractor and want to improve it). + + The mapping for our example with articles: suppose we have basic structure extractor for articles, + and it returns the same types as defined in manifest -- dictionary contains identity mapping. + + .. code-block:: python + + label2label_article = {t: t for t in ("title", "named_item", "author", "affiliation", "caption", "reference", "raw_text", "other")} + + According to the initialization of the ``item2label`` parameter, if the detected line type wasn't found in the ``label2label_article`` dictionary, + the line will be marked as ``raw_text`` by default. During annotation process, annotators will fix line types from ``raw_text`` into correct one. + We choose ``raw_text`` as it's the most frequent type, but in other cases, another type can be used. + +3. Add new type of the task to the API form. It should be done as tasks will be created via API (see the next step). + + Let's consider our example with articles. + In the previous step, we added a new key "article_classifier" to the ``taskers`` dictionary. + This key is a name of the "type_of_task" parameter in the API form. + To add a new option for this parameter, "article_classifier" should be added to the enum of ``TrainDatasetParameters`` in the ``labeling/train_dataset/api/api.py``: + + .. code-block:: python + + @dataclass + class TrainDatasetParameters(QueryParameters): + type_of_task: Optional[str] = Form("law_classifier", + enum=[ + "law_classifier", "tz_classifier", "diploma_classifier", "header_classifier", "paragraph_classifier", + "tables_classifier", "article_classifier" + ], + description="Type of the task to create") + task_size: Optional[str] = Form("250", description="Maximum number of images in one task") + + Also, "article_classifier" should be added to the HTML file ``labeling/train_dataset/api/web/form_input_archive.html``: + + .. code-block:: html + +

+ +

+ +Everything is ready for running our task-creation server. + +Create tasks using API +---------------------- + +To run the application, go to the project root (``dedoc`` directory) and use the following command: + + .. code-block:: bash + + docker-compose -f labeling/docker-compose.yml up --build + +Or you can run API from the ``labeling`` directory: + + .. code-block:: bash + + cd labeling + docker-compose up --build + + +By default, API is available on ``localhost:1232``. Let's go to the page in a browser. + +.. figure:: ../../_static/add_new_structure_type/main_page.png + :width: 800 + +The main page contains a basic description of a task creation process. +According to the text, we need to create an archive with all files we want to include to the training dataset and annotate. +These files will be parsed by Dedoc (so their formats should be in the :ref:`table_formats`), +and then a tasker will create tasks (document formats should be supported by the chosen tasker). + +.. note:: + At the current moment, taskers support images creation only for DOCX, TXT, PDF files and images. + Please create `an issue `_, if you need to make tasks for other document formats. + +Let's go to the link in the third item of the first step: + +.. figure:: ../../_static/add_new_structure_type/tasks_page.png + :width: 500 + +Most of the configured parameters are related to the files parsing by Dedoc (:ref:`dedoc_api`). +New parameters: + + * **Size of the task** -- number of images to annotate in one task; + * **type_of_task** -- tasker used for tasks creation. + +For our example, we configure a language (English), type of the task (article). +Then we should form the archive and choose it in the form. +We upload :download:`archive.zip <../../_static/add_new_structure_type/archive.zip>` and press the ``Upload`` button. + +.. figure:: ../../_static/add_new_structure_type/waiting_page.png + :width: 800 + +The opened page contains link where ready tasks will appear after creation. +The user can run the creation of several tasks and save links to download ready tasks afterwards. + +After tasks have been created, the link will lead on the following page: + +.. figure:: ../../_static/add_new_structure_type/download_page.png + :width: 800 + +After the archive with tasks was downloaded and unpacked, there are the following files inside: + + * ``original_documents.zip`` -- original files from the input documents for labeling; + * several ``task_{uid}.zip`` -- files with tasks for one person to annotate (each file contains no more than ``Size of the task`` images; + * ``task_manager.py``, ``formInput.html``, ``formResult.html`` -- are used to run a server for tasks distribution and gathering. + +To run the server, go to the directory where these files are located and run the commands: + + .. code-block:: bash + + pip3 install Flask==2.0.3 + python3 task_manager.py + +Then, go to ``localhost:3000`` and look at the page: + +.. figure:: ../../_static/add_new_structure_type/manager_page.png + :width: 400 + +It isn't obligatory to run an application for tasks management. It may be useful when there are several annotators who label data. +If you don't want to run an application, annotate tasks from each file ``task_{uid}.zip`` and save the resulting ``json`` files. +They can be merged into one dataset using ``_merge_labeled()`` function of ``task_manager.py``. + +In case of running application, tasks can be downloaded by each user using "Get the task" button. +After labeling (see the next step) the resulting ``json`` files are uploaded via "Upload results" button. +When all tasks are labeled, the information will appear that there's nothing to annotate. +The resulting merged dataset can be downloaded using "Get results" button. + + +Label dataset +------------- + +In the previous step, we got several ``task_{uid}.zip`` -- these files are tasks for annotating. +To annotate the task, you should do the following: + +1. Unpack the archive and go to the formed directory ``task_{uid}``. +2. Run the container with `labeling application `_: + + .. code-block:: bash + + bash run.sh + +3. Open ``localhost:5555`` in your browser: + + .. figure:: ../../_static/add_new_structure_type/labeling_page.png + :width: 800 + + The labeling process is quite intuitive, look to the README file of the `repository `_ for more details. + When all images in the task are labeled the download button will appear, + and the labeling results will be stored in a ``json`` file like :download:`result.json <../../_static/add_new_structure_type/article_classifier_000000_UX6.json>`. + +Merging resulting ``json`` files is described in the previous step. + +The resulting dataset should contain: + * ``labeled.json`` -- merged annotated ``json`` files with information about lines and their labels; + * ``original_documents`` -- directory with original documents, i.e. the input documents for labeling, they may be useful for reprocessing by Dedoc of a newer version. diff --git a/docs/source/tutorials/add_new_structure_type/features_extraction.rst b/docs/source/tutorials/add_new_structure_type/features_extraction.rst new file mode 100644 index 00000000..584710b3 --- /dev/null +++ b/docs/source/tutorials/add_new_structure_type/features_extraction.rst @@ -0,0 +1,102 @@ +.. _add_structure_type_features_extraction: + +Features extraction +=================== + +According to the :ref:`description of the pipeline for structure extraction`, +we should define how to extract feature matrix for a given document. +In feature matrix, each column represents one feature, each row corresponds one document line. +Thus, having a feature matrix for a document, we can feed it to the lines classifier and get lines' types. + +To get a feature matrix for a certain document domain, one should implement a specific class -- +an inheritor of the :class:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor`. +This class contains a list of useful methods that can be tips for features extracting for a new domain. + +The new custom domain-specific feature extractor should be an inheritor of the :class:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor` +and, therefore, implement all its abstract methods. The most important method is +:meth:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor.transform`, +that is utilized for transforming document lines into a feature matrix. + +Example: implementation of a feature extractor for English articles +------------------------------------------------------------------- + +In the :ref:`dataset creation tutorial`, +we used an example domain of English articles. +Here we continue this example and implement ``ArticleFeatureExtractor``. + +Let's implement the basic methods of the parent class: + + * :meth:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor.parameters` -- + we don't plan to use any parameters in the ``__init__`` method, so an empty dictionary can be returned; + + * :meth:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor.fit` -- + we don't need to train our feature extractor, so the method can be empty; + + * :meth:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor.transform` -- + here we implement a basic scheme of features extraction from each document and their concatenation. + +.. literalinclude:: ../../_static/code_examples/article_feature_extractor.py + :language: python + :lines: 30-42 + +To find the most suitable features for a feature extractor, it's better to look into the data first. +The example :download:`archive.zip <../../_static/add_new_structure_type/archive.zip>` with few articles +can suggest using specific regular expressions or keywords. +In our example, the dataset is too small to consider all possible variants adequately. +In real life, we recommend at least 30 various documents of the domain to be included to the training data. + +From the given data, we can conclude frequently used keywords and patterns in named items and captions. +This forms some regular expressions and predefined lists of keywords: + +.. literalinclude:: ../../_static/code_examples/article_feature_extractor.py + :language: python + :lines: 19-27 + +Let's implement a method that makes a feature vector for one document line. +We suggest the following groups of features to extract for each line: + + * **visual features** -- information about document formatting, e.g., spacing between two lines, + font size and boldness, etc. Several methods of formatting extraction are available in + the class :class:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor`, + that may be used in a custom feature extractor. + + * **textual features** -- features based on domain-specific regular expressions, keywords, + and other information about the text. + + * **statistical features** -- some statistical information, e.g. line number in the document, its length, etc. + +Let's implement the method that extracts the aforesaid groups of features: + +.. literalinclude:: ../../_static/code_examples/article_feature_extractor.py + :language: python + :lines: 67-93 + +There are other potentially useful tips: + + * In documents of some domains, we may encounter numerated items. + To add features related to the numeration, we can use the existing functionality: + ``ListFeaturesExtractor`` class and :meth:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor._list_features` method. + + * Values of some features may vary depending on the document, but their relative value can be important. + For example, lines with a bigger font size may be important structure units. + In this case, normalization can help, :meth:`~dedoc.structure_extractors.feature_extractors.abstract_extractor.AbstractFeatureExtractor._normalize_features` method. + + * We can use contextual nature of the data. + For this purpose, we can add features of previous or next lines to the features of the current line. + +To finish implementation of the feature extractor for articles, let's add a method of feature extraction for one document: + +.. literalinclude:: ../../_static/code_examples/article_feature_extractor.py + :language: python + :lines: 44-65 + +The file with the feature extractor should be placed in ``dedoc/structure_extractors/feature_extractors``. +The resulting file with the feature extractor for English articles (our example) can be downloaded +:download:`here <../../_static/code_examples/article_feature_extractor.py>`. + +Or you may copy the code below. + +.. toggle:: + + .. literalinclude:: ../../_static/code_examples/article_feature_extractor.py + :language: python diff --git a/docs/source/tutorials/add_new_structure_type/hierarchy_level.rst b/docs/source/tutorials/add_new_structure_type/hierarchy_level.rst new file mode 100644 index 00000000..1f94aea6 --- /dev/null +++ b/docs/source/tutorials/add_new_structure_type/hierarchy_level.rst @@ -0,0 +1,64 @@ +.. _add_structure_type_hierarchy_level: + +Hierarchy level for document lines +================================== + +According to the documentation of the class :class:`~dedoc.data_structures.HierarchyLevel`, +it defines the level of the document line :class:`~dedoc.data_structures.LineWithMeta`. +The lower is its value, the more important the line is. + +Hierarchy level is used in the intermediate representation of the document :class:`~dedoc.data_structures.UnstructuredDocument`. +In this representation, document content is a flat list of lines. +Hierarchy level of each line adds some hierarchy to this flat list. + +:class:`~dedoc.data_structures.LineMetadata` of :class:`~dedoc.data_structures.LineWithMeta` contains two types of hierarchy levels: + + * **tag_hierarchy_level** -- information extracted by readers using information provided by a specific format. + When it is impossible to get such information, ``tag_hierarchy_level=HierarchyLevel.unknown``. + * **hierarchy_level** -- information added by structure extractors according to some document domain. + Hierarchy levels may be custom, but it is recommended to use :meth:`~dedoc.data_structures.HierarchyLevel.create_root` for + the most important line (if any) and :meth:`~dedoc.data_structures.HierarchyLevel.create_raw_text` for the least important lines. + +:class:`~dedoc.data_structures.HierarchyLevel` is used when a structure constructor (:class:`~dedoc.structure_constructors.AbstractStructureConstructor`) +converts :class:`~dedoc.data_structures.UnstructuredDocument` into :class:`~dedoc.data_structures.ParsedDocument`. +The class :class:`~dedoc.structure_constructors.TreeConstructor` compares hierarchy levels of lines, and based on the result, +builds a document tree: the less hierarchy level is, the closer the line will be to the document root, and vice versa. + +Hierarchy level contains two levels: ``level_1`` and ``level_2``. +These levels can be filled freely by any integer or ``None`` value, levels will be compared by a tree constructor as tuples: + +.. code-block:: python + + (first.level_1, first.level_2) < (second.level_1, second.level_2) + +Thus, ``level_1`` defines a primary importance, e.g. for defining type of line: + +* Document headings → ``level_1=1`` +* Document list items → ``level_1=2`` +* Other lines → ``level_1=None`` + +While ``level_2`` may be used to arrange lines of equal type such as nested lists or multi-level headings: + +* `1. list item` → ``level_2=1`` +* `1.1 nested list item` → ``level_2=2`` +* `1.2 another nested item` → ``level_2=2`` +* `1.2.1.1 more deeply nested item` → ``level_2=4`` + +or + +* Heading 1 → ``level_2=1`` +* Heading 2 → ``level_2=2`` + +Thus, when we meet lines `1. list item` with level=(2, 1) and `Heading 2` with level=(1, 2), +the heading line is more important, because (1, 2) < (2, 1). + +Lines with ``None`` values are considered the least important, ``None`` is equal +∞ when we compare levels tuples. +For example, raw text lines (``HierarchyLevel.raw_text``) have ``level_1=None`` and ``level_2=None``, they become leaves in the document tree. + +Another thing that a tree structure constructor does is lines merging into one tree node. +If ``can_be_multiline`` property is ``True``, it means that the line can be merged with other neighbor lines +that have equal hierarchy level (level_1, level_2) and type (line_type). +Also, for lines with type ``HierarchyLevel.list_item``, an empty parent node with type ``list`` will be created. + +.. warning:: + Some details of a hierarchy level and tree structure constructor work may be changed in the future diff --git a/docs/source/tutorials/add_new_structure_type/lines_classification.rst b/docs/source/tutorials/add_new_structure_type/lines_classification.rst new file mode 100644 index 00000000..5c2015ea --- /dev/null +++ b/docs/source/tutorials/add_new_structure_type/lines_classification.rst @@ -0,0 +1,142 @@ +.. _add_structure_type_lines_classification: + +Lines classification +==================== + +.. seealso:: + Training classifier is performed on the custom dataset described in :ref:`add_structure_type_dataset_creation`. + Before classification, :ref:`add_structure_type_features_extraction` to feed the classifier should be done. + +Here we describe two steps: + 1. Writing and running training script to get weights of a trained classifier. + 2. Adding a classifier class in code of Dedoc for further usage. + +Classifier training +------------------- + +Training a line classifier is quite typical, training scripts for different domains can be found in ``scripts/train``. +But during the process of adjusting the extracted features and classifier's hyperparameters +for enhancing classifier's accuracy, a lot of problems may be encountered. + +There are several classes, that are used during training, in particular, for analysis of a training process: + +* :class:`~scripts.train.trainers.data_loader.DataLoader` downloads data from cloud and transforms json-lines into Python classes. +* :class:`~scripts.train.trainers.dataset.LineClassifierDataset` represents dataset of document lines in form of a feature matrix. +* :class:`~scripts.train.trainers.base_sklearn_line_classifier.BaseClassifier` is a wrapper of `XGBClassifier `_. +* :class:`~scripts.train.trainers.base_sklearn_line_classifier.BaseSklearnLineClassifierTrainer` -- base class for training :class:`~scripts.train.trainers.base_sklearn_line_classifier.BaseClassifier`. +* :class:`~scripts.train.trainers.xgboost_line_classifier_trainer.XGBoostLineClassifierTrainer` is a trainer for `XGBClassifier `_ with ability to save importance of dataset features. +* :class:`~scripts.train.trainers.errors_saver.ErrorsSaver` is used for saving line classifier’s errors during training. + +With this functionality, one may train classifiers of document lines, +analyse the most important features for classifiers, +visualize and analyse errors made by classifiers. +After the analysis, training data or feature extraction process may be changed +in order to improve classification results. + +Example: training a classifier for English articles +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To train a classifier for English articles, we will use the class :class:`~scripts.train.trainers.xgboost_line_classifier_trainer.XGBoostLineClassifierTrainer` -- +please read the documentation of the class (and its base class) to learn its main parameters. + +Below there is a code of trainer initialization and running a training process. +As a result, the trained classifier will be saved in ``classifier_path``, +its scores and importance of used features -- in ``path_scores`` and ``path_feature_importances`` correspondingly. + +.. literalinclude:: ../../_static/code_examples/train_article_line_classifier.py + :language: python + :lines: 42-58 + + +The ``label_transformer`` parameter allows to rename labels of the initial labeled data. +It may be useful when we want to merge two labels into one, or to ignore some labels completely -- in this case, the label_transformer should return ``None``. +In our example, let's ignore lines that have a label ``other``. The rest lines will have their labels unchanged. + +.. literalinclude:: ../../_static/code_examples/train_article_line_classifier.py + :language: python + :lines: 11-17 + +We also need to do path configuration for saving resulting files: + +.. literalinclude:: ../../_static/code_examples/train_article_line_classifier.py + :language: python + :lines: 20-31 + +Now we have almost everything we need, let's initialize the remaining parameters required by a trainer: + +.. literalinclude:: ../../_static/code_examples/train_article_line_classifier.py + :language: python + :lines: 33-40 + +To run the script without errors, one should provide ``data_url`` parameter to download the training dataset. +Please, see training scripts in ``scripts/train`` where urls to the existing datasets are provided to get some examples. +Obtaining a training dataset is described in :ref:`add_structure_type_dataset_creation` in more details. + +Thus, writing a training script is finished. The file with the script should be placed in the ``scripts/train`` directory. +The full training script may be downloaded :download:`here <../../_static/code_examples/train_article_line_classifier.py>`, +or you may copy the code below. + +.. toggle:: + + .. literalinclude:: ../../_static/code_examples/train_article_line_classifier.py + :language: python + + +Classifier adding +----------------- + +Since we trained a new classifier and saved its weights, we can use it in dedoc for inference. +Do do this, an inheritor of the :class:`~dedoc.structure_extractors.line_type_classifiers.abstract_pickled_classifier.AbstractPickledLineTypeClassifier` should be implemented. +In this class, the :meth:`~dedoc.structure_extractors.line_type_classifiers.abstract_line_type_classifier.AbstractLineTypeClassifier.predict` method is the most important part. +This method is used for classification of the list of input :class:`~dedoc.data_structures.LineWithMeta` related to one document. +As a result, the list of line types is obtained. + +The commonplace pipeline in the :meth:`~dedoc.structure_extractors.line_type_classifiers.abstract_line_type_classifier.AbstractLineTypeClassifier.predict` method works as follows: + + * Forming a feature matrix for the input list of document lines using a feature extractor; + * Calling a classifier's prediction method (classes probabilities also may be obtained); + * If needed, some post-processing of probabilities can be done (optional). + +Example: implementation of a line classifier for English articles +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the :ref:`features extraction tutorial`, +we used an example domain of English articles and implemented ``ArticleFeatureExtractor``. +Here we continue this example and implement ``ArticleLineTypeClassifier``. + +In the ``__init__`` method of the class, let's load classifier weights and initialize a feature extractor: + +.. literalinclude:: ../../_static/code_examples/article_classifier.py + :language: python + :lines: 11-14 + +Let's implement the aforesaid commonplace pipeline in the ``predict`` method. + +Forming a feature matrix: + +.. literalinclude:: ../../_static/code_examples/article_classifier.py + :language: python + :lines: 20 + +Calling a classifier's prediction method: + +.. literalinclude:: ../../_static/code_examples/article_classifier.py + :language: python + :lines: 21 + +Executing some custom post-processing of the predicted classes probabilities: + +.. literalinclude:: ../../_static/code_examples/article_classifier.py + :language: python + :lines: 23-43 + +The file with the line classifier should be placed in ``dedoc/structure_extractors/line_type_classifiers``. +The resulting file with the classifier for English articles (our example) can be downloaded +:download:`here <../../_static/code_examples/article_classifier.py>`. + +Or you may copy the code below. + +.. toggle:: + + .. literalinclude:: ../../_static/code_examples/article_classifier.py + :language: python diff --git a/docs/source/tutorials/add_new_structure_type/post_processing.rst b/docs/source/tutorials/add_new_structure_type/post_processing.rst new file mode 100644 index 00000000..fa57355e --- /dev/null +++ b/docs/source/tutorials/add_new_structure_type/post_processing.rst @@ -0,0 +1,69 @@ +.. _add_structure_type_post_processing: + +Lines post-processing +===================== + +.. seealso:: + The :ref:`description ` of the whole pipeline for structure extraction using classification. + Before post-processing, :ref:`add_structure_type_lines_classification` should be done. + +The last step of the structure extraction pipeline is post-processing and filling lines' hierarchy levels. +For this purpose, one should implement a structure extractor -- an inheritor of the class :class:`~dedoc.structure_extractors.AbstractStructureExtractor`. + +Classifiers of line types sometimes make mistakes, some lines can be split or merged -- all this may be done during post-processing. +A specific logic can be described during implementation of a method :meth:`~dedoc.structure_extractors.AbstractStructureExtractor.extract`. +The main purpose of a structure extractor is to fill ``hierarchy_level`` attribute of the :class:`~dedoc.data_structures.LineMetadata` class. +:ref:`add_structure_type_hierarchy_level` and documentation of the :class:`~dedoc.data_structures.HierarchyLevel` may help +in implementation of a custom structure extractor. + + +Example: implementation of a structure extractor for English articles +--------------------------------------------------------------------- + +In the :ref:`lines classification tutorial`, +we used an example domain of English articles and implemented ``ArticleLineTypeClassifier``. +Here we continue this example and implement ``ArticleStructureExtractor``. + +In the ``__init__`` method of the class, let's initialize a classifier and define domain-specific keywords: + +.. literalinclude:: ../../_static/code_examples/article_structure_extractor.py + :language: python + :lines: 19-24 + +Then we should define the :meth:`~dedoc.structure_extractors.AbstractStructureExtractor.extract` method and fill ``hierarchy_level`` +attribute according to the following assumptions: + +* `title` lines will be merged and form a document root; +* `named_item` lines are children of the root, they may be hierarchical if there is a numeration of article chapters; +* `author`, `affiliation`, `reference` are less important than `named_item` and `title` but more important the rest lines. + +With the help of the :ref:`description ` of hierarchy level peculiarities, +we decided to use the following values of ``level_1`` and ``level_2`` of :class:`~dedoc.data_structures.HierarchyLevel`: + +* `title` → ``level_1=0, level_2=0`` +* `named_item` → ``level_1=1,2, level_2=1,2,3...`` +* `author`, `affiliation`, `reference` → ``level_1=3, level_2=1`` +* other line types → ``level_1=None, level_2=None`` + +The implementation of the :meth:`~dedoc.structure_extractors.AbstractStructureExtractor.extract` method can be the following: + +.. literalinclude:: ../../_static/code_examples/article_structure_extractor.py + :language: python + :lines: 26-49 + +.. note:: + In the example, we didn't fix any mistakes of the classifier in order to simplify the code. + In reality, a lot of post-processing work should be done in order to get good results of structure parsing. + +The whole code of the ``ArticleStructureExtractor`` can be found below. + +The file with the structure extractor should be placed in ``dedoc/structure_extractors/concrete_structure_extractors``. +The resulting file with the structure extractor for English articles (our example) can be downloaded +:download:`here <../../_static/code_examples/article_structure_extractor.py>`. + +Or you may copy the code below. + +.. toggle:: + + .. literalinclude:: ../../_static/code_examples/article_structure_extractor.py + :language: python diff --git a/docs/source/tutorials/creating_document_classes.rst b/docs/source/tutorials/creating_document_classes.rst index 83043466..f29c3ba3 100644 --- a/docs/source/tutorials/creating_document_classes.rst +++ b/docs/source/tutorials/creating_document_classes.rst @@ -54,6 +54,8 @@ etc. Some parts of the document (for example title) may take more than one line. To union them set ``can_be_multiline`` to `True` and then copy ``level_1``, ``level_2`` and ``line_type`` from the first line to others. +Look to the :ref:`hierarchy level description ` to get more details. + Define metadata with :class:`~dedoc.data_structures.LineMetadata`: .. literalinclude:: ../_static/code_examples/dedoc_creating_dedoc_document.py diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index d64c2e29..00000000 --- a/examples/README.md +++ /dev/null @@ -1,91 +0,0 @@ -# Examples of document parsing with Dedoc - -This directory contains files with examples of using Dedoc to process different types of documents. - -It's worth starting with the `example_manager_input.py` file. It describes examples of document processing using the `DedocManager` class. -This is the easiest way, since this class automatically determines the format of the file being processed and calls the necessary readers. - -As shown in corresponding examples, you can create this manager with following lines: -```python -from dedoc import DedocManager - -manager = DedocManager() -``` -And after that you can get parsed document with one simple line, just replace `"your_file_name"` with the path to your chosen file: -```python -parsed_document = manager.parse(file_path="your_file_name") -``` -To get more information, look at [Dedoc usage tutorial](https://dedoc.readthedocs.io/en/latest/getting_started/usage.html). - -If you want to call a specific parser, you can look at some examples in this directory. File `example_doc_parser.py` shows how to use `DocxReader`, -`example_pdf_parser.py` shows examples with PDF file parsing. In order to parse image-like file you can call `PdfImageReader` like it's shown in -`example_img_parser.py`. - -You can check an example like this: -```shell -cd examples -python3 create_structured_document.py -``` - -## Running API examples in a docker container - -You can look at the example of using a post-request to parse documents while Dedoc container is working. -This example is written in `example_post.py`. - - -### Work with a docker container - -You may use your own container with Dedoc, if there is one, otherwise it may be downloaded with the following command: - -```shell -export VERSION_TAG=v0.1 -docker pull dedocproject/dedoc:VERSION_TAG -``` - -`VERSION_TAG` - tag of the library release, e.g. `v0.1`. - - -Run the Dedoc container: - -```shell -docker run dedocproject/dedoc:VERSION_TAG -``` - -If you want to use Dedoc outside the container (locally), the option for ports should be added: - -```shell -docker run -p 1231:1231 dedocproject/dedoc:VERSION_TAG -``` - -For running example inside Dedoc container one should do the following: - -1. Open a new terminal window. Get a list of available containers: - ```shell - docker ps -a - ``` - The Dedoc container has `IMAGE` field equal `dedocproject/dedoc:VERSION_TAG` (or a custom name if you have another container). - Copy its `CONTAINER_ID` for the future commands. - -2. Start the container if it isn't running: - ```shell - docker start CONTAINER_ID - ``` - In this case a new terminal window in needed for the next steps. - -3. Run a command line of the Dedoc container: - ```shell - docker exec -it CONTAINER_ID bash - ``` - -4. Get examples from `https://github.com` (use `VERSION_TAG` described before) if you don't have them: - ```shell - git clone https://github.com/ispras/dedoc - git checkout VERSION_TAG - cd dedoc/examples - ``` -5. Run the example with API usage: - ```shell - python3 example_post.py - ``` - -6. Exit from the Dedoc container using `exit` command. diff --git a/examples/__init__.py b/examples/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/examples/create_structured_document.py b/examples/create_structured_document.py deleted file mode 100644 index dced3ae5..00000000 --- a/examples/create_structured_document.py +++ /dev/null @@ -1,10 +0,0 @@ -# noqa -from create_unstructured_document import unstructured_document -from dedoc.structure_constructors import TreeConstructor - -# to create structured document you can use TreeConstructor and apply it to unstructured document -# in this example we'll use unstructured_document from create_unstructured_document.py -structure_constructor = TreeConstructor() -parsed_document = structure_constructor.construct(document=unstructured_document) - -print(parsed_document.to_api_schema().model_dump()) diff --git a/examples/create_unstructured_document.py b/examples/create_unstructured_document.py deleted file mode 100644 index f9a026c9..00000000 --- a/examples/create_unstructured_document.py +++ /dev/null @@ -1,62 +0,0 @@ -# noqa -# in this example we create UnstructuredDocument, let's construct document corresponding to example.docx -from dedoc.data_structures import LineMetadata, Table, UnstructuredDocument -from dedoc.data_structures import TableMetadata -from dedoc.data_structures import LineWithMeta - -# First of all let's create some table, table consists of cells (list of rows, and row is a list of cells with metadata) -from dedoc.data_structures import HierarchyLevel -from dedoc.data_structures.cell_with_meta import CellWithMeta -from dedoc.metadata_extractors import BaseMetadataExtractor - - -table_cells = [ - ["N", "Second name", "Name", "Organization", "Phone", "Notes"], - ["1", "Ivanov", "Ivan", "ISP RAS", "8-800"], -] -cells_with_meta = [[CellWithMeta(lines=[LineWithMeta(line=cell_text, - metadata=LineMetadata(page_id=0, line_id=None), - annotations=[])]) for cell_text in row] for row in table_cells] -# table also has some metadata, let's assume that our table is on the first page -table_metadata = TableMetadata(page_id=0, uid="table 1") - -# let's build table -table = Table(cells=cells_with_meta, metadata=table_metadata) - -# Documents also contain some text. -# Logical structure of document may be represented by tree (see example_tree.png) -# but unstructured document consists of flat list of lines with text and metadata -# hierarchy structure hidden in HierarchyLevel attribute of LineWithMeta -# let's build first line, it is document tree root: - -# hierarchy level defines position of this line in a document tree. - -hierarchy_level = HierarchyLevel( - # most important parameters of HierarchyLevel is level_1 and level_2 - # hierarchy level compares by tuple (level_1, level_2) lesser -> closer to the root of the tree - level_1=0, - level_2=0, - # can_be_multiline and line_type - some parts of the document (for example title) may take more than one line - # if can_be_multiline is true then several lines in a row with same level_1, level_2 and line_type will be merged in one tree node - can_be_multiline=True, - line_type="header" -) -text = "DOCUMENT TITLE" -metadata = LineMetadata(page_id=0, line_id=1, tag_hierarchy_level=None, hierarchy_level=hierarchy_level, other_fields=None) - -# Annotations: one may specify some information about some part of the text, for example that some word written in italic font. -annotations = [] - -line1 = LineWithMeta(line=text, metadata=metadata, annotations=annotations) - -unstructured_document = UnstructuredDocument(tables=[table], lines=[line1], attachments=[]) - -# I hope you understand some concepts of the LineWithMeta, but you may ask why it need level_1 and level_2 -# parameters. Why is only level_1 not enough. Imagine that we have lists like these: -# 1. 1.1. 1.2.1.1. and so on, It may be difficult to restrict the length of the list with -# some pre-set number, so you may define -# HierarchyLevel(1, 1) for 1. -# HierarchyLevel(1, 2) for 1.1. -# HierarchyLevel(1, 4) for 1.2.1.1. and so on -metadata = BaseMetadataExtractor().extract(file_path="example.docx", converted_filename="example.doc", original_filename="example.docx") -unstructured_document.metadata = metadata diff --git a/examples/example.docx b/examples/example.docx deleted file mode 100644 index a8621aa3..00000000 Binary files a/examples/example.docx and /dev/null differ diff --git a/examples/example.jpg b/examples/example.jpg deleted file mode 100644 index afd13dc2..00000000 Binary files a/examples/example.jpg and /dev/null differ diff --git a/examples/example_doc_parser.py b/examples/example_doc_parser.py deleted file mode 100644 index bf9f059d..00000000 --- a/examples/example_doc_parser.py +++ /dev/null @@ -1,34 +0,0 @@ -# noqa -from dedoc.config import get_config -from dedoc.readers.docx_reader.docx_reader import DocxReader - -# create docx reader -docx_reader = DocxReader(config=get_config()) -# and read file example.docx -file_name = "example.docx" - -# we get unstructured file with lines and tables -unstructured_document = docx_reader.read(file_path=file_name) - -# let's look at the content of unstructured_file, it consists of tables and lines -print(unstructured_document.tables, unstructured_document.lines) - -# first of all let's look at the table -table = unstructured_document.tables[0] -# table consists of cells (we assume that table is rectangle) -# cell is a list of rows and row is a list of cells with metadata -for row in table.cells: - for cell in row: - print(cell.get_text().replace("\n", "\t") + " ", end="") - print("\n") - -# there is also some metadata in the table -print(table.metadata) - -# and now let's look at lines. lines is a list of objects of class LineWithMeta -lines = unstructured_document.lines -# let's look at the first line -line = lines[0] -print(line) -# line consists of line (text), metadata, hierarchy level -print(line.line, line.metadata, line.metadata.hierarchy_level) diff --git a/examples/example_img_parser.py b/examples/example_img_parser.py deleted file mode 100644 index 009e2708..00000000 --- a/examples/example_img_parser.py +++ /dev/null @@ -1,31 +0,0 @@ -# noqa -from dedoc.config import get_config -from dedoc.readers.pdf_reader.pdf_image_reader.pdf_image_reader import PdfImageReader - -# create img reader -# pdf reader can parse image-like formats -img_reader = PdfImageReader(config=get_config()) -# and read file example.docx -file_name = "example.jpg" - -# we get unstructured file with lines and tables -unstructured_document = img_reader.read(file_path=file_name) - -# let's look at the content of unstructured_file, it consists of tables and lines -print(unstructured_document.tables, unstructured_document.lines) - -# first of all let's look at the table -table = unstructured_document.tables[0] -# table consists of cells (we assume that table is rectangle) -# so cells is a list of rows and row is a list of cells with metadata -print(table.cells) -# there is also some metadata in the table -print(table.metadata) - -# and now let's look at lines. lines is a list of objects of class LineWithMeta -lines = unstructured_document.lines -# let's look at the first line -line = lines[0] -print(line) -# line consists of line (text), metadata, hierarchy level -print(line.line, line.metadata, line.metadata.hierarchy_level) diff --git a/examples/example_manager_input.py b/examples/example_manager_input.py deleted file mode 100644 index e43fd0ac..00000000 --- a/examples/example_manager_input.py +++ /dev/null @@ -1,34 +0,0 @@ -# noqa -import json - -from dedoc import DedocManager - -manager = DedocManager() - -filename_docx = "example.docx" -parsed_docx_document = manager.parse(file_path=filename_docx, parameters={}) - -# save the result -with open("result_docx.json", "w") as outfile: - outfile.write(json.dumps(parsed_docx_document.to_api_schema().model_dump())) - -filename_jpg = "example.jpg" -parsed_jpg_document = manager.parse(file_path=filename_jpg, parameters={}) - -# save the result -with open("result_jpg.json", "w") as outfile: - outfile.write(json.dumps(parsed_jpg_document.to_api_schema().model_dump())) - -filename_pdf_no_text_layer = "example_without_text_layer.pdf" -parsed_pdf_no_text_layer_document = manager.parse(file_path=filename_pdf_no_text_layer, parameters={"pdf_with_text_layer": "false"}) - -# save the result -with open("result_pdf_no_text_layer.json", "w") as outfile: - outfile.write(json.dumps(parsed_pdf_no_text_layer_document.to_api_schema().model_dump())) - -filename_pdf_with_text_layer = "example_with_text_layer.pdf" -parsed_pdf_with_text_layer_document = manager.parse(file_path=filename_pdf_with_text_layer, parameters={"pdf_with_text_layer": "true"}) - -# save the result -with open("result_pdf_with_text_layer.json", "w") as outfile: - outfile.write(json.dumps(parsed_pdf_with_text_layer_document.to_api_schema().model_dump())) diff --git a/examples/example_pdf_parser.py b/examples/example_pdf_parser.py deleted file mode 100644 index 7fe44a99..00000000 --- a/examples/example_pdf_parser.py +++ /dev/null @@ -1,60 +0,0 @@ -# noqa -from dedoc.config import get_config -from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdf_txtlayer_reader import PdfTxtlayerReader -from dedoc.readers.pdf_reader.pdf_image_reader.pdf_image_reader import PdfImageReader - -# create pdf text layer reader -pdf_txt_layer_reader = PdfTxtlayerReader(config=get_config()) -# and read file example.docx -file_name = "example_with_text_layer.pdf" - -# we get unstructured file with lines and tables -unstructured_document = pdf_txt_layer_reader.read(file_path=file_name) - -# let's look at the content of unstructured_file, it consists of tables and lines -print(unstructured_document.tables, unstructured_document.lines) - -# first of all let's look at the table -table = unstructured_document.tables[0] -# table consists of cells (we assume that table is rectangle) -# so cells is a list of rows and row is a list of cells with metadata -print(table.cells) -# there is also some metadata in the table -print(table.metadata) - -# and now let's look at lines. lines is a list of objects of class LineWithMeta -lines = unstructured_document.lines -# let's look at first line -line = lines[0] -print(line) -# line consists of line (text), metadata, hierarchy level -print(line.line, line.metadata, line.metadata.hierarchy_level) - - -# let's now look at the example of parsing pdf document without text layer -# create pdf reader -pdf_image_reader = PdfImageReader(config=get_config()) -# and read file example.docx -file_name = "example_without_text_layer.pdf" - -# we get unstructured file with lines and tables -unstructured_document = pdf_image_reader.read(file_path=file_name) - -# let's look at the content of unstructured_file, it consists of tables and lines -print(unstructured_document.tables, unstructured_document.lines) - -# first of all lets look at the table -table = unstructured_document.tables[0] -# table consists of cells (we assume that table is rectangle) -# so cells is list of rows and row is list of cells with metadata -print(table.cells) -# there is also some metadata in the table -print(table.metadata) - -# and now let's look at lines. lines is a list of objects of class LineWithMeta -lines = unstructured_document.lines -# let's look at first line -line = lines[0] -print(line) -# line consists of line (text), metadata, hierarchy level -print(line.line, line.metadata, line.metadata.hierarchy_level) diff --git a/examples/example_post.py b/examples/example_post.py deleted file mode 100644 index 1eaf3c61..00000000 --- a/examples/example_post.py +++ /dev/null @@ -1,22 +0,0 @@ -import json -import os -from pprint import pprint - -import requests - -# We want to parse file example.docx -# We will send it with requests lib -# To parse other document types just paste correct file name below -file_name = "example.docx" -file_path = os.path.abspath(file_name) - -with open(file_path, 'rb') as file: - # file we want to parse - files = {'file': (file_name, file)} - # dict with additional parameters - data = {"document_type": ""} - # and now we send post request with attached file and parameters. - r = requests.post("http://localhost:1231/upload", files=files, data=data) - # wait for response, parse json result and print it - result = json.loads(r.content.decode()) - pprint(result) diff --git a/examples/example_tree.png b/examples/example_tree.png deleted file mode 100644 index 963d2d02..00000000 Binary files a/examples/example_tree.png and /dev/null differ diff --git a/examples/example_with_text_layer.pdf b/examples/example_with_text_layer.pdf deleted file mode 100644 index 45b240c7..00000000 Binary files a/examples/example_with_text_layer.pdf and /dev/null differ diff --git a/examples/example_without_text_layer.pdf b/examples/example_without_text_layer.pdf deleted file mode 100644 index 2db9ff61..00000000 Binary files a/examples/example_without_text_layer.pdf and /dev/null differ diff --git a/labeling/Dockerfile b/labeling/Dockerfile new file mode 100644 index 00000000..25970c9f --- /dev/null +++ b/labeling/Dockerfile @@ -0,0 +1,23 @@ +ARG REPOSITORY="docker.io" +FROM dedocproject/dedoc_p3.9_base:version_2023_08_28 + +ENV PYTHONPATH "${PYTHONPATH}:/labeling_root:/labeling_root/labeling" +ENV RESOURCES_PATH "/labeling_root/resources" + +ADD requirements.txt . +RUN pip3 install --no-cache-dir -r requirements.txt +RUN pip3 install Jinja2==3.1.2 + +RUN mkdir /labeling_root +RUN mkdir /labeling_root/dedoc +ADD dedoc/config.py /labeling_root/dedoc/config.py +ADD dedoc/download_models.py /labeling_root/dedoc/download_models.py +RUN python3 /labeling_root/dedoc/download_models.py + +ADD dedoc /labeling_root/dedoc +ADD VERSION /labeling_root +RUN echo "__version__ = \"$(cat /labeling_root/VERSION)\"" > /labeling_root/dedoc/version.py + +ADD labeling /labeling_root/labeling + +CMD ["python3", "/labeling_root/labeling/train_dataset/main.py"] diff --git a/labeling/README.md b/labeling/README.md new file mode 100644 index 00000000..e5ba46e9 --- /dev/null +++ b/labeling/README.md @@ -0,0 +1,19 @@ +# Label data for dedoc classifiers + +To run labeling API, use the following command (from the project root): +```shell +docker-compose -f labeling/docker-compose.yml up --build +``` + +Or you can run API from `labeling` directory: +```shell +cd labeling +docker-compose up --build +``` + +By default, API is available on `localhost:1232`. + +To run tests, use the following command (from the project root): +```shell +test="true" docker-compose -f labeling/docker-compose.yml up --build --exit-code-from test +``` diff --git a/labeling/docker-compose.yml b/labeling/docker-compose.yml new file mode 100644 index 00000000..51bebc49 --- /dev/null +++ b/labeling/docker-compose.yml @@ -0,0 +1,30 @@ +version: '2.4' + +services: + labeling: + mem_limit: 16G + build: + context: .. + dockerfile: labeling/Dockerfile + restart: always + tty: true + ports: + - 1232:1232 + environment: + PORT: 1232 + + + test: + depends_on: + - labeling + build: + context: .. + dockerfile: labeling/Dockerfile + tty: true + environment: + HOST: "labeling" + PORT: 1232 + is_test: $test + PYTHONPATH: $PYTHONPATH:/labeling_root/labeling/tests:/labeling_root/labeling:/labeling_root + command: + bash labeling_root/labeling/tests/run_tests_in_docker.sh diff --git a/resources/Arial_Narrow.ttf b/labeling/resources/Arial_Narrow.ttf similarity index 100% rename from resources/Arial_Narrow.ttf rename to labeling/resources/Arial_Narrow.ttf diff --git a/resources/train_dataset/diploma/config.json b/labeling/resources/diploma/config.json similarity index 100% rename from resources/train_dataset/diploma/config.json rename to labeling/resources/diploma/config.json diff --git a/resources/train_dataset/diploma/manifest.odt b/labeling/resources/diploma/manifest.odt similarity index 100% rename from resources/train_dataset/diploma/manifest.odt rename to labeling/resources/diploma/manifest.odt diff --git a/resources/train_dataset/diploma/manifest.pdf b/labeling/resources/diploma/manifest.pdf similarity index 100% rename from resources/train_dataset/diploma/manifest.pdf rename to labeling/resources/diploma/manifest.pdf diff --git a/resources/train_dataset/formInput.html b/labeling/resources/formInput.html similarity index 63% rename from resources/train_dataset/formInput.html rename to labeling/resources/formInput.html index 997ed0fb..c05cb50c 100644 --- a/resources/train_dataset/formInput.html +++ b/labeling/resources/formInput.html @@ -1,8 +1,8 @@ - + - Задания для разметки + Tasks for annotation
@@ -10,9 +10,9 @@
-

Как Вас зовут: +

Name of the annotator:

-
+
@@ -21,8 +21,8 @@
-

Отдать результаты

-

Получить результаты

+

Upload results

+

Get results

left = {tasks_left}

diff --git a/resources/train_dataset/formResult.html b/labeling/resources/formResult.html similarity index 60% rename from resources/train_dataset/formResult.html rename to labeling/resources/formResult.html index 4c1a4d73..cf414afc 100644 --- a/resources/train_dataset/formResult.html +++ b/labeling/resources/formResult.html @@ -1,5 +1,5 @@ - + @@ -8,8 +8,8 @@
-
+ data-buttonText="Choose the file" id="select-file-box">
+
diff --git a/resources/train_dataset/header/config.json b/labeling/resources/header/config.json similarity index 100% rename from resources/train_dataset/header/config.json rename to labeling/resources/header/config.json diff --git a/resources/train_dataset/header/manifest.odt b/labeling/resources/header/manifest.odt similarity index 100% rename from resources/train_dataset/header/manifest.odt rename to labeling/resources/header/manifest.odt diff --git a/resources/train_dataset/header/manifest.pdf b/labeling/resources/header/manifest.pdf similarity index 100% rename from resources/train_dataset/header/manifest.pdf rename to labeling/resources/header/manifest.pdf diff --git a/resources/train_dataset/img_classifier_dockerfile/Dockerfile b/labeling/resources/img_classifier_dockerfile/Dockerfile similarity index 100% rename from resources/train_dataset/img_classifier_dockerfile/Dockerfile rename to labeling/resources/img_classifier_dockerfile/Dockerfile diff --git a/labeling/resources/img_classifier_dockerfile/README.md b/labeling/resources/img_classifier_dockerfile/README.md new file mode 100644 index 00000000..8dd6b70b --- /dev/null +++ b/labeling/resources/img_classifier_dockerfile/README.md @@ -0,0 +1,21 @@ +Suppose that: + +* You unpacked the archive with the task (where this file is located) + +* You are in the task directory (that appeared after the task unpacking) + +* You have Docker installed on your computer + +* You have the Internet connection + +Read the manifest file (manifest.pdf) + +Build the container for labeling and run it + +```bash +docker build -t labeling . + +docker run -ti --rm -p 5555:5555 labeling +``` + +Open localhost:5555 in your browser. \ No newline at end of file diff --git a/labeling/resources/img_classifier_dockerfile/run.sh b/labeling/resources/img_classifier_dockerfile/run.sh new file mode 100644 index 00000000..4c7813eb --- /dev/null +++ b/labeling/resources/img_classifier_dockerfile/run.sh @@ -0,0 +1,20 @@ +# Suppose that: +# +# * You unpacked the archive with the task (where this file is located) +# +# * You are in the task directory (that appeared after the task unpacking) +# +# * You have Docker installed on your computer +# +# * You have the Internet connection +# +# Read the manifest file (manifest.pdf) +# +# Build the container for labeling and run it + +docker build -t labeling . + +docker run -ti --rm -p 5555:5555 labeling + +# +# Open localhost:5555 in your browser. \ No newline at end of file diff --git a/resources/train_dataset/law/config.json b/labeling/resources/law/config.json similarity index 100% rename from resources/train_dataset/law/config.json rename to labeling/resources/law/config.json diff --git a/resources/train_dataset/law/manifest.odt b/labeling/resources/law/manifest.odt similarity index 100% rename from resources/train_dataset/law/manifest.odt rename to labeling/resources/law/manifest.odt diff --git a/resources/train_dataset/law/manifest.pdf b/labeling/resources/law/manifest.pdf similarity index 100% rename from resources/train_dataset/law/manifest.pdf rename to labeling/resources/law/manifest.pdf diff --git a/resources/train_dataset/paragraph/config.json b/labeling/resources/paragraph/config.json similarity index 100% rename from resources/train_dataset/paragraph/config.json rename to labeling/resources/paragraph/config.json diff --git a/resources/train_dataset/paragraph/manifest.odt b/labeling/resources/paragraph/manifest.odt similarity index 100% rename from resources/train_dataset/paragraph/manifest.odt rename to labeling/resources/paragraph/manifest.odt diff --git a/resources/train_dataset/paragraph/manifest.pdf b/labeling/resources/paragraph/manifest.pdf similarity index 100% rename from resources/train_dataset/paragraph/manifest.pdf rename to labeling/resources/paragraph/manifest.pdf diff --git a/resources/train_dataset/tables/config.json b/labeling/resources/tables/config.json similarity index 100% rename from resources/train_dataset/tables/config.json rename to labeling/resources/tables/config.json diff --git a/resources/train_dataset/tables/manifest.odt b/labeling/resources/tables/manifest.odt similarity index 100% rename from resources/train_dataset/tables/manifest.odt rename to labeling/resources/tables/manifest.odt diff --git a/resources/train_dataset/tables/manifest.pdf b/labeling/resources/tables/manifest.pdf similarity index 100% rename from resources/train_dataset/tables/manifest.pdf rename to labeling/resources/tables/manifest.pdf diff --git a/resources/train_dataset/tz/config.json b/labeling/resources/tz/config.json similarity index 100% rename from resources/train_dataset/tz/config.json rename to labeling/resources/tz/config.json diff --git a/resources/train_dataset/tz/manifest.odt b/labeling/resources/tz/manifest.odt similarity index 100% rename from resources/train_dataset/tz/manifest.odt rename to labeling/resources/tz/manifest.odt diff --git a/resources/train_dataset/tz/manifest.pdf b/labeling/resources/tz/manifest.pdf similarity index 100% rename from resources/train_dataset/tz/manifest.pdf rename to labeling/resources/tz/manifest.pdf diff --git a/labeling/tests/data/archive.zip b/labeling/tests/data/archive.zip new file mode 100644 index 00000000..8b2d2cd9 Binary files /dev/null and b/labeling/tests/data/archive.zip differ diff --git a/labeling/tests/data/diploma_archive.zip b/labeling/tests/data/diploma_archive.zip new file mode 100644 index 00000000..a7ee0700 Binary files /dev/null and b/labeling/tests/data/diploma_archive.zip differ diff --git a/tests/data/images_creator/english_doc.docx b/labeling/tests/data/images_creators/english_doc.docx similarity index 100% rename from tests/data/images_creator/english_doc.docx rename to labeling/tests/data/images_creators/english_doc.docx diff --git a/labeling/tests/data/images_creators/english_doc.pdf b/labeling/tests/data/images_creators/english_doc.pdf new file mode 100644 index 00000000..7ae0b98d Binary files /dev/null and b/labeling/tests/data/images_creators/english_doc.pdf differ diff --git a/labeling/tests/data/images_creators/law_image.png b/labeling/tests/data/images_creators/law_image.png new file mode 100644 index 00000000..4f278e1c Binary files /dev/null and b/labeling/tests/data/images_creators/law_image.png differ diff --git a/tests/data/images_creator/txt_example.txt b/labeling/tests/data/images_creators/txt_example.txt similarity index 100% rename from tests/data/images_creator/txt_example.txt rename to labeling/tests/data/images_creators/txt_example.txt diff --git a/tests/data/laws/law_classifier_000000_Bhw.json b/labeling/tests/data/laws/law_classifier_000000_Bhw.json similarity index 100% rename from tests/data/laws/law_classifier_000000_Bhw.json rename to labeling/tests/data/laws/law_classifier_000000_Bhw.json diff --git "a/labeling/tests/data/laws/\320\272\320\276\320\260\320\277_\320\274\320\276\321\201\320\272\320\262\321\213_8_7_2015_utf.txt" "b/labeling/tests/data/laws/\320\272\320\276\320\260\320\277_\320\274\320\276\321\201\320\272\320\262\321\213_8_7_2015_utf.txt" new file mode 100644 index 00000000..a8cce83d --- /dev/null +++ "b/labeling/tests/data/laws/\320\272\320\276\320\260\320\277_\320\274\320\276\321\201\320\272\320\262\321\213_8_7_2015_utf.txt" @@ -0,0 +1,4730 @@ + + + + + З А К О Н + + ГОРОДА МОСКВЫ + + от 21 ноября 2007 г. № 45 + + Кодекс города Москвы об административных правонарушениях + + (В редакции законов Москвы от 10.12.2008 № 64, от 20.05.2009 № 12, от +03.06.2009 № 17, от 07.10.2009 № 42, от 10.03.2010 № 6, от 14.04.2010 № 11, от +23.06.2010 № 28, от 13.10.2010 № 40, от 01.06.2011 № 21, от 26.10.2011 № 47, от +23.11.2011 № 55, от 23.11.2011 № 56, от 14.12.2011 № 64, от 17.10.2012 № 50, от +24.10.2012 № 51, от 31.10.2012 № 54, от 12.12.2012 № 65, от 16.01.2013 № 1, от +13.03.2013 № 11, от 27.03.2013 № 12, от 03.04.2013 № 13, от 24.04.2013 № 19, от +26.06.2013 № 35, от 26.06.2013 № 36, от 20.11.2013 № 65, от 22.01.2014 № 2, от +07.05.2014 № 24, от 04.06.2014 № 30, от 18.06.2014 № 31, от 17.12.2014 № 61, от +21.01.2015 № 1, от 13.05.2015 № 25, от 13.05.2015 № 26, от 27.05.2015 № 28, от +24.06.2015 № 32, от 24.06.2015 № 38, от 24.06.2015 № 39, от 08.07.2015 № 43, от +08.07.2015 № 46, от 09.09.2015 № 49, от 09.12.2015 № 68, от 27.01.2016 № 3, от +23.03.2016 № 13, от 25.05.2016 № 26, от 23.11.2016 № 39, от 20.09.2017 № 35, от +24.01.2018 № 3, от 19.12.2018 № 32, от 26.12.2018 № 40, от 30.01.2019 № 4, от +29.04.2019 № 15, от 22.05.2019 № 20, от 11.12.2019 № 34, от 01.04.2020 № 6) + + Закон города Москвы "Кодекс города Москвы об административных +правонарушениях" (далее - Кодекс) в соответствии с Конституцией Российской +Федерации, Кодексом Российской Федерации об административных правонарушениях, +Уставом города Москвы устанавливает административную ответственность по +вопросам, не отнесенным Кодексом Российской Федерации об административных +правонарушениях к ведению Российской Федерации, в том числе за нарушение норм и +правил, предусмотренных законами и иными нормативными правовыми актами города +Москвы, нормативными правовыми актами органов местного самоуправления в городе +Москве, определяет органы и перечень должностных лиц, уполномоченных составлять +протоколы и рассматривать дела об административных правонарушениях, +предусмотренных настоящим Кодексом, а также определяет перечень должностных лиц, +уполномоченных составлять протоколы об административных правонарушениях в +случаях, предусмотренных Кодексом Российской Федерации об административных +правонарушениях. (В редакции Закона Москвы от 24.06.2015 г. № 39) + + + + + Статья 1.1. Законодательство города Москвы об административных +правонарушениях + + 1. Законодательство города Москвы об административных правонарушениях +состоит из настоящего Кодекса. + + 2. Нормы, устанавливающие административную ответственность по вопросам, не +отнесенным Кодексом Российской Федерации об административных правонарушениях к +ведению Российской Федерации, не могут содержаться в иных законах города Москвы +и подлежат включению в настоящий Кодекс. + + Статья 1.2. Виды административных наказаний + + 1. За совершение административных правонарушений, предусмотренных настоящим +Кодексом, могут устанавливаться и применяться в отношении граждан, должностных +лиц и юридических лиц следующие административные наказания: + + 1) предупреждение; + + 2) административный штраф. + + 2. Совершившие административные правонарушения в связи с выполнением +организационно-распорядительных или административно-хозяйственных функций +руководители и другие работники организаций, не являющихся государственными и +муниципальными, а также лица, осуществляющие предпринимательскую деятельность +без образования юридического лица (далее - индивидуальные предприниматели), +несут административную ответственность как должностные лица, если настоящим +Кодексом не установлено иное. + + Статья 1.3. Порядок зачисления административных штрафов + + Суммы административных штрафов за административные правонарушения, +предусмотренные настоящим Кодексом, зачисляются в бюджет города Москвы либо в +бюджеты внутригородских муниципальных образований в городе Москве в порядке, +установленном законом города Москвы о бюджете города Москвы на соответствующий +финансовый год. + + Глава 2. Административные правонарушения, посягающие на права граждан и +здоровье населения + + Статья 2.1. Неправомерный отказ, уклонение от рассмотрения либо нарушение +порядка и сроков рассмотрения обращений граждан + + (Утратила силу - Закон Москвы от 07.10.2009 г. № 42) + + Статья 2.2. Нарушение установленного порядка квотирования рабочих мест + + Невыполнение работодателем установленной законодательством города Москвы +обязанности по созданию или выделению квотируемых рабочих мест - + + влечет наложение административного штрафа на должностных лиц в размере от +трех тысяч до пяти тысяч рублей; на юридических лиц - от тридцати тысяч до +пятидесяти тысяч рублей. + + Статья 2.3. Невыполнение требований по обеспечению доступа инвалидов к +объектам социальной, транспортной и инженерной инфраструктур + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 2.4. Нарушение установленных Правительством Москвы норм и правил в +сфере опеки и попечительства + + Нарушение установленных Правительством Москвы норм и правил в сфере опеки и +попечительства - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от пятисот до одной тысячи рублей; на должностных лиц - от одной тысячи +до десяти тысяч рублей; на юридических лиц - от тридцати тысяч до пятидесяти +тысяч рублей. + + (Статья в редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 2.5. Неисполнение постановлений и представлений комиссий по делам +несовершеннолетних и защите их прав + + Неисполнение либо создание препятствий для исполнения постановления или +представления комиссии по делам несовершеннолетних и защите их прав, принятого в +соответствии с ее компетенцией, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от пятисот до одной тысячи рублей; на должностных лиц - от одной тысячи +до трех тысяч рублей. + + Статья 2.6. Отказ в приеме граждан на обучение и исключение их из +образовательных учреждений и организаций + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 2.7. Нарушение прав граждан на получение общего образования + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 2.8. Воспрепятствование деятельности членов комиссии по делам +несовершеннолетних и защите их прав + + Воспрепятствование посещению учреждения системы профилактики безнадзорности +и правонарушений несовершеннолетних членом комиссии по делам несовершеннолетних +и защите их прав, осуществляющим по поручению данной комиссии проверку условий +содержания, воспитания и обучения несовершеннолетних в указанном учреждении, - + + влечет наложение административного штрафа на граждан в размере от пятисот +до одной тысячи рублей; на должностных лиц - от четырех тысяч до пяти тысяч +рублей; на юридических лиц - от тридцати тысяч до пятидесяти тысяч рублей. + + Статья 2.9. Невыполнение обязанности по выделению бесплатных билетов для +организации досуга несовершеннолетних + + Невыполнение учреждением культуры, физической культуры и спорта, получающим +денежные средства из бюджета города Москвы или целевых бюджетных фондов развития +территорий административных округов и районов города Москвы, обязанности по +выделению для детей из неблагополучных семей установленного правовыми актами +города Москвы количества реализуемых (распространяемых) указанным учреждением +билетов (абонементов) - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до двух тысяч рублей; на юридических лиц - от десяти тысяч до +двадцати тысяч рублей. + + Статья 2.10. Нарушение порядка льготного обеспечения лекарственными +средствами и изделиями медицинского назначения + + Нарушение установленного Правительством Москвы порядка обеспечения +отдельных категорий жителей города Москвы лекарственными средствами и изделиями +медицинского назначения, отпускаемыми по рецептам врачей бесплатно или с +50-процентной скидкой, - + + влечет наложение административного штрафа на должностных лиц в размере от +пятисот до двух тысяч рублей; на юридических лиц - от двух тысяч до пяти тысяч +рублей. + + Статья 2.10.1. Вовлечение несовершеннолетних в процесс использования +устройств, имитирующих курение табака + + 1. Вовлечение несовершеннолетних в процесс использования устройств, +имитирующих курение табака, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи пятисот до трех тысяч рублей; на должностных лиц в размере от двух тысяч +пятисот до пяти тысяч рублей. + + 2. Те же действия, совершенные родителями (законными представителями) +несовершеннолетних, а также лицами, на которых возложены обязанности по обучению +и воспитанию несовершеннолетних, - + + влекут наложение административного штрафа в размере от четырех тысяч до +пяти тысяч рублей. + + Примечание. Под использованием устройств, имитирующих курение табака, в +настоящем Кодексе понимается использование электронных систем доставки никотина +либо других веществ, содержащихся в жидкостях или иных субстанциях для указанных +электронных систем, кальянов, в том числе электронных, а также устройств для +нагревания табака и иных аналогичных устройств, имитирующих курение табака. + + (Статья дополнена - Закон Москвы от 30.01.2019 № 4) + + Статья 2.11. Нарушение порядка предоставления льгот по оплате ветеринарных +услуг + + Нарушение установленного Правительством Москвы порядка предоставления +отдельным категориям жителей города Москвы льгот по оплате ветеринарных услуг - + + влечет наложение административного штрафа на должностных лиц в размере от +пятисот до одной тысячи рублей; на юридических лиц - от одной тысячи до двух +тысяч рублей. + + Статья 2.12. Нарушение административного регламента предоставления +государственной услуги города Москвы + + 1. Нарушение административного регламента предоставления государственной +услуги города Москвы, выразившееся в требовании о необходимости представления +заявителем документов, не входящих в перечень документов, подлежащих +представлению заявителем согласно соответствующему административному регламенту, +за исключением случаев, когда нормативное правовое регулирование отношений, +возникающих в связи с предоставлением данной государственной услуги, +осуществляется нормативными правовыми актами Российской Федерации, а равно в +отказе в приеме у заявителя документов, необходимых для предоставления +государственной услуги, по основаниям, не предусмотренным соответствующим +административным регламентом, - + + влечет наложение административного штрафа на должностных лиц органов +исполнительной власти города Москвы в размере от пяти тысяч до десяти тысяч +рублей; на работников казенных, автономных учреждений города Москвы и +многофункциональных центров предоставления государственных услуг - от одной +тысячи пятисот до трех тысяч рублей. + + 2. Нарушение административного регламента предоставления государственной +услуги города Москвы, выразившееся в приостановлении предоставления +государственной услуги города Москвы по основаниям, не предусмотренным +соответствующим административным регламентом, - + + влечет наложение административного штрафа на должностных лиц органов +исполнительной власти города Москвы в размере от трех тысяч до пяти тысяч +рублей; на работников казенных и автономных учреждений города Москвы - от одной +тысячи до одной тысячи пятисот рублей. + + 3. Отказ в предоставлении государственной услуги по основаниям, не +предусмотренным административным регламентом предоставления государственной +услуги города Москвы, за исключением случаев, когда нормативное правовое +регулирование отношений, возникающих в связи с предоставлением данной +государственной услуги, осуществляется нормативными правовыми актами Российской +Федерации, - + + влечет наложение административного штрафа на должностных лиц органов +исполнительной власти города Москвы в размере от трех тысяч до пяти тысяч +рублей; на работников казенных и автономных учреждений города Москвы - от одной +тысячи до одной тысячи пятисот рублей. + + (Статья дополнена - Закон Москвы от 04.06.2014 г. № 30) + + Глава 3. Административные правонарушения, посягающие на общественный +порядок и общественную безопасность + + Статья 3.1. Нарушение установленного порядка организации и проведения +массовых мероприятий + + Нарушение организаторами массовых мероприятий установленного уполномоченным +органом исполнительной власти города Москвы, а равно согласованного с ним +порядка организации и проведения массовых мероприятий (за исключением +организации и проведения собраний, митингов, демонстраций, шествий и +пикетирования) - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи пятисот до двух тысяч пятисот рублей; на юридических лиц - от тридцати +тысяч до пятидесяти тысяч рублей. + + (Статья в редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 3.2. Нарушение правил поведения при посещении массовых мероприятий + + 1. Нарушение установленных уполномоченным органом исполнительной власти +города Москвы, а равно согласованных с ним правил поведения при посещении +культурно-зрелищных, спортивных и иных мероприятий в общественных местах - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от ста до трехсот рублей. + + 2. Выбрасывание посторонних предметов на сцены, трибуны, футбольные поля, +хоккейные и другие спортивные площадки, акватории водных сооружений, беговые +дорожки, нахождение на них без разрешения уполномоченных лиц, а также совершение +иных действий, препятствующих проведению культурно-зрелищных, спортивных и иных +мероприятий в общественных местах, если эти деяния не содержат признаков +правонарушений, предусмотренных федеральным законодательством, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи пятисот до двух тысяч пятисот рублей. + + Статья 3.3. Воспрепятствование законной деятельности народного дружинника +или члена общественного пункта охраны порядка + + Неповиновение законному требованию народного дружинника или члена +общественного пункта охраны порядка при исполнении им обязанностей по охране +общественного порядка, а равно воспрепятствование его законной деятельности в +иной форме - + + влечет наложение административного штрафа на граждан в размере от пятисот +до одной тысячи пятисот рублей; на должностных лиц - от одной тысячи до двух +тысяч рублей. + + Статья 3.4. Использование пиротехнических средств в общественных местах + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 3.5. Нарушение установленных нормативными правовыми актами города +Москвы требований к устройству фейерверков в городе Москве + + 1. Устройство негосударственными организациями фейерверков, нарушающих +тишину и покой жителей города Москвы с 23 часов до 7 часов, за исключением +случаев, установленных нормативными правовыми актами города Москвы, - + + влечет наложение административного штрафа на должностных лиц в размере от +трех тысяч до четырех тысяч рублей; на юридических лиц - от пяти тысяч до шести +тысяч рублей. + + 2. Устройство гражданами фейерверков, нарушающих тишину и покой жителей +города Москвы с 23 часов до 7 часов, за исключением случаев, установленных +нормативными правовыми актами города Москвы, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи пятисот до двух тысяч рублей. + + (Статья в редакции Закона Москвы от 12.12.2012 г. № 65) + + Статья 3.6. Нарушение правил охраны жизни людей на воде + + 1. Нарушение установленных Правительством Москвы правил охраны жизни людей +на воде в городе Москве - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от двух тысяч до двух тысяч пятисот рублей; на должностных лиц - от +четырех тысяч до пяти тысяч рублей; на юридических лиц - от сорока тысяч до +пятидесяти тысяч рублей. + + 2. Невыполнение судоводителем или иным лицом требований или ограничений, +установленных правилами пользования водными объектами для плавания на маломерных +судах в городе Москве, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от пятисот до одной тысячи рублей. + + Статья 3.7. Нарушение порядка использования территории Московского +метрополитена и 25-метровой зоны от наземных вестибюлей станций и сооружений +Московского метрополитена + + (Утратила силу - Закон Москвы от 24.01.2018 г. № 3) + + Статья 3.8. Приставание к гражданам в общественных местах + + Приставание к гражданам в общественных местах, то есть нарушение +общественного порядка, выразившееся в навязчивых действиях гражданина, +осуществляемых в отношении других граждан против их воли, в целях купли-продажи, +обмена или приобретения вещей иным способом, а также в целях гадания, +попрошайничества (за исключением случаев, предусмотренных частью 4 статьи 10.9 +настоящего Кодекса), оказания услуг сексуального характера либо навязывания иных +услуг в общественных местах - (В редакции Закона Москвы от 13.05.2015 г. № 25) + + влечет предупреждение или наложение административного штрафа на граждан в +размере от ста до пятисот рублей. + + Статья 3.9. Гадание в общественных местах + + Гадание за деньги, вещи и иные ценности на улицах или в других общественных +местах - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от ста до пятисот рублей. + + Статья 3.10. Игра в карты или иные азартные игры на территориях общего +пользования + + Игра в карты или иные азартные игры на деньги, вещи и иные ценности на +территориях общего пользования (в том числе на площадях, улицах, проездах, +набережных, береговых полосах водных объектов общего пользования, скверах, +бульварах), предусмотренных Градостроительным кодексом Российской Федерации, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от ста до пятисот рублей. + + (Статья в редакции Закона Москвы от 12.12.2012 г. № 65) + + Статья 3.11. Нарушение возрастных ограничений при демонстрации +аудиовизуальных произведений + + 1. Публичная демонстрация аудиовизуальных произведений, для просмотра +которых установлены возрастные ограничения, при отсутствии на афишах информации +об указанных возрастных ограничениях - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до двух тысяч рублей; на юридических лиц - от двух тысяч до трех +тысяч рублей. + + 2. Публичная демонстрация рекламных блоков аудиовизуальных произведений, +для просмотра которых установлены возрастные ограничения, во время демонстрации +аудиовизуальных произведений, для просмотра которых возрастные ограничения не +установлены, - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до двух тысяч рублей; на юридических лиц - от двух тысяч до трех +тысяч рублей. + + 3. Допуск на просмотр аудиовизуальных произведений лиц с нарушением +установленных для них возрастных ограничений - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи пятисот до двух тысяч рублей; на юридических лиц - от четырех тысяч +до пяти тысяч рублей. + + Статья 3.12. Непринятие мер по недопущению нахождения несовершеннолетних в +общественных и иных местах без сопровождения родителей (лиц, их заменяющих) или +лиц, осуществляющих мероприятия с участием несовершеннолетних + + 1. Непринятие мер по недопущению нахождения несовершеннолетних, не +достигших возраста 18 лет, в местах, пребывание в которых может причинить вред +здоровью несовершеннолетних, их физическому, интеллектуальному, психическому, +духовному и нравственному развитию: на территориях, в помещениях, которые +предназначены для реализации товаров сексуального характера, в букмекерских +конторах и тотализаторах, - + + влечет наложение административного штрафа на должностных лиц в размере от +двух тысяч пятисот до пяти тысяч рублей; на юридических лиц - от десяти тысяч до +тридцати тысяч рублей. + + 2. Непринятие мер по недопущению нахождения в ночное время (с 23 часов до 6 +часов) несовершеннолетних, не достигших возраста 18 лет, без сопровождения +родителей (лиц, их заменяющих) или лиц, осуществляющих мероприятия с участием +несовершеннолетних, на объектах (на территориях, в помещениях) юридических лиц +или индивидуальных предпринимателей, которые предназначены для реализации услуг +в сфере общественного питания, для развлечений, досуга, где предусмотрены +розничная продажа и распитие пива и напитков, изготавливаемых на его основе, +алкогольной продукции, - + + влечет наложение административного штрафа на должностных лиц в размере от +двух тысяч пятисот до пяти тысяч рублей; на юридических лиц - от десяти тысяч до +тридцати тысяч рублей. + + 3. Непринятие мер по недопущению нахождения в ночное время (с 23 часов до 6 +часов) несовершеннолетних, не достигших возраста 16 лет, без сопровождения +родителей (лиц, их заменяющих) или лиц, осуществляющих мероприятия с участием +несовершеннолетних, на территориях, на которых ведется строительство, на +территориях автомагистралей, путепроводов, железнодорожных магистралей и полос +отвода железных дорог, нефте-, газо- и продуктопроводов, высоковольтных линий +электропередачи, трубопроводов, в парках, водоемах и на прилегающих к ним +территориях (береговая полоса), в помещениях общего пользования (на технических +этажах, чердаках, в подвалах) и на крышах жилых домов, на территориях, +прилегающих к образовательным учреждениям, в организациях, обеспечивающих доступ +к сети Интернет, - + + влечет предупреждение или наложение административного штрафа на родителей +(законных представителей), лиц, осуществляющих мероприятия с участием +несовершеннолетних, в размере от ста до пятисот рублей. + + (Статья в редакции Закона Москвы от 14.04.2010 г. № 11) + + Статья 3.13. Нарушение тишины и покоя граждан + + Совершение действий, нарушающих тишину и покой граждан с 23 часов до 7 +часов на установленных законодательством города Москвы защищаемых территориях и +в защищаемых помещениях, а равно совершение действий, нарушающих покой граждан и +тишину, при проведении переустройства и (или) перепланировки жилого помещения в +многоквартирном доме и (или) нежилого помещения, не являющегося общим имуществом +собственников помещений в многоквартирном доме, иных ремонтных работ в данных +помещениях с 19 часов до 9 часов и с 13 часов до 15 часов, а также в воскресенье +и нерабочие праздничные дни (кроме случаев, когда указанные работы +осуществляются в течение полутора лет со дня ввода многоквартирного дома в +эксплуатацию), за исключением действий, направленных на предотвращение +правонарушений, ликвидацию последствий аварий, стихийных бедствий, иных +чрезвычайных ситуаций, проведение неотложных работ, связанных с обеспечением +личной и общественной безопасности граждан в соответствии с законодательством +Российской Федерации, действий, совершаемых при отправлении религиозных культов +в рамках канонических требований соответствующих конфессий, случаев, +предусмотренных статьями 4.46 и 4.50 настоящего Кодекса, а также при проведении +культурно-массовых мероприятий, разрешенных органами государственной власти или +органами местного самоуправления в городе Москве, - (В редакции законов Москвы +от 17.12.2014 г. № 61; от 09.12.2015 г. № 68) + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до двух тысяч рублей; на должностных лиц - от четырех +тысяч до восьми тысяч рублей; на юридических лиц - от сорока тысяч до +восьмидесяти тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 3.14. Непредставление организацией декларации безопасности + + (Утратила силу - Закон Москвы от 09.09.2015 г. № 49) + + Статья 3.15. Непроведение организацией уведомления о чрезвычайной ситуации + + Непроведение немедленного уведомления организацией, в состав которой входит +потенциально опасный объект, уполномоченного органа исполнительной власти города +Москвы в области защиты населения и территорий от чрезвычайных ситуаций, +руководителей территориальных органов исполнительной власти города Москвы, +органов местного самоуправления, организаций и населения, расположенных в +опасной зоне, о чрезвычайной ситуации (характере и масштабе, реальной угрозе +жизни и здоровью людей, материальным ценностям), возникшей на территории +указанной организации или на подведомственной ей территории, - + + влечет наложение административного штрафа на должностных лиц в размере от +четырех тысяч до пяти тысяч рублей; на юридических лиц - от десяти тысяч до +тридцати тысяч рублей. + + Статья 3.16. Непроведение организацией работ по прогнозированию +чрезвычайных ситуаций + + Непроведение организацией, в состав которой входит потенциально опасный +объект, работ по оценке риска и прогнозированию вероятности возникновения +чрезвычайных ситуаций - + + влечет наложение административного штрафа на должностных лиц в размере от +четырех тысяч до пяти тысяч рублей; на юридических лиц - от десяти тысяч до +тридцати тысяч рублей. + + Статья 3.17. Невыполнение организацией требований об обязательном +страховании рисков возникновения чрезвычайных ситуаций + + (Утратила силу - Закон Москвы от 09.09.2015 г. № 49) + + Статья 3.18. Нарушение установленных нормативными правовыми актами города +Москвы требований в области защиты населения и территорий города от чрезвычайных +ситуаций природного и техногенного характера, обеспечения безопасности людей на +водных объектах города Москвы + + Нарушение установленных нормативными правовыми актами города Москвы +требований в области защиты населения и территорий города от чрезвычайных +ситуаций природного и техногенного характера, обеспечения безопасности людей на +водных объектах города Москвы - + + влечет наложение административного штрафа на должностных лиц в размере от +четырех тысяч до пяти тысяч рублей. + + (Статья в редакции Закона Москвы от 12.12.2012 г. № 65) + + Статья 3.18.1. Нарушение требований нормативных правовых актов города +Москвы, направленных на введение и обеспечение режима повышенной готовности на +территории города Москвы + + 1. Неисполнение требований о временной приостановке проведения мероприятий +с очным присутствием граждан, а также работы объектов розничной торговли, +организаций (предприятий) общественного питания, оказания услуг с посещением +гражданами таких объектов, организаций (предприятий), если эти действия +(бездействие) не содержат уголовно наказуемого деяния или не влекут +административной ответственности в соответствии с Кодексом Российской Федерации +об административных правонарушениях, - + + влечет наложение административного штрафа на должностных лиц в размере от +тридцати тысяч до сорока тысяч рублей; на юридических лиц - от двухсот тысяч до +трехсот тысяч рублей. + + 2. Невыполнение гражданами требований нормативных правовых актов города +Москвы, направленных на введение и обеспечение режима повышенной готовности на +территории города Москвы, в том числе необеспечение режима самоизоляции, если +эти действия (бездействие) не содержат уголовно наказуемого деяния или не влекут +административной ответственности в соответствии с Кодексом Российской Федерации +об административных правонарушениях, - + + влечет наложение административного штрафа на граждан в размере четырех +тысяч рублей. + + 3. Повторное совершение административного правонарушения, предусмотренного +частями 1 и 2 настоящей статьи, - + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - от сорока тысяч до пятидесяти тысяч рублей; на +юридических лиц - от трехсот тысяч до пятисот тысяч рублей. + + 4. Совершение административного правонарушения, предусмотренного частью 2 +настоящей статьи, с использованием транспортного средства - + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей. + + (Часть дополнена - Закон Москвы от 01.04.2020 № 6) + + Статья 3.19. Невыполнение собственником объекта требований по обеспечению +безопасного остекления + + Невыполнение собственником объекта с массовым пребыванием населения +требований по обеспечению безопасного остекления - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до трех тысяч рублей; на должностных лиц - от четырех +тысяч до пяти тысяч рублей; на юридических лиц - от десяти тысяч до тридцати +тысяч рублей. + + (Статья дополнена - Закон Москвы от 07.10.2009 г. № 42) + + Статья 3.20. Нарушение установленных Правительством Москвы правил пожарной +безопасности на природных и озелененных территориях, особо охраняемых зеленых +территориях города Москвы, особо охраняемых природных территориях регионального +значения в городе Москве + + 1. Разведение костров, проведение мероприятий, предусматривающих +использование открытого огня, использование мангалов и иных приспособлений для +тепловой обработки пищи с помощью открытого огня вне специально обустроенных +площадок на природных и озелененных территориях, особо охраняемых зеленых +территориях города Москвы, - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до четырех тысяч рублей; на должностных лиц - от двадцати тысяч до +тридцати тысяч рублей; на юридических лиц - от ста пятидесяти тысяч до двухсот +тысяч рублей. + + 2. Те же деяния, совершенные на особо охраняемых природных территориях +регионального значения в городе Москве, - + + влекут наложение административного штрафа на граждан в размере от четырех +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до сорока +тысяч рублей; на юридических лиц - от двухсот тысяч до трехсот тысяч рублей. + + (Статья дополнена - Закон Москвы от 22.01.2014 г. № 2) + + Глава 4. Административные правонарушения в области охраны окружающей среды +и природопользования + + Статья 4.1. Нарушение порядка ведения государственного кадастра особо +охраняемых природных территорий в городе Москве + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 4.2. Нарушение режимов охраны и использования особо охраняемых +природных территорий регионального значения в городе Москве, а также их охранных +зон + + Нарушение установленных нормативными правовыми актами города Москвы режимов +охраны и использования особо охраняемых природных территорий регионального +значения в городе Москве, а также их охранных зон (за исключением случаев, +предусмотренных частью 2 статьи 3.20 настоящего Кодекса) - (В редакции Закона +Москвы от 22.01.2014 г. № 2) + + влечет наложение административного штрафа на граждан в размере от четырех +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до сорока +тысяч рублей; на юридических лиц - от двухсот тысяч до трехсот тысяч рублей. + + (Статья в редакции Закона Москвы от 12.12.2012 г. № 65) + + Статья 4.3. Загрязнение вод в границах природного комплекса города Москвы + + Загрязнение, засорение, истощение поверхностных или подземных вод либо иное +изменение их природных свойств на территории природного комплекса города Москвы +- + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до трех тысяч рублей; на должностных лиц - сорока тысяч рублей; на +юридических лиц - двухсот пятидесяти тысяч рублей. + + Статья 4.4. Нарушение экологических требований при эксплуатации очистных +сооружений и сетей ливневой канализации + + Нарушение экологических требований при эксплуатации очистных сооружений и +сетей ливневой канализации - + + влечет наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - двухсот тысяч рублей. + + Статья 4.5. Сброс или поступление иным способом загрязняющих веществ на +рельеф местности + + Сброс или поступление иным способом загрязняющих веществ на рельеф +местности - + + влечет наложение административного штрафа на должностных лиц в размере +пятидесяти тысяч рублей; на юридических лиц - трехсот тысяч рублей. + + Статья 4.6. Нарушение порядка проведения работ по рекультивации +несанкционированных свалок в городе Москве + + Нарушение порядка проведения работ по рекультивации несанкционированных +свалок в городе Москве - + + влечет наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - двухсот пятидесяти тысяч рублей. + + Статья 4.7. Сброс или размещение снега вне специально отведенных мест +хранения и удаления + + (Утратила силу - Закон Москвы от 13.03.2013 г. № 11) + + Статья 4.8. Нарушение правил применения материалов, используемых в качестве +противогололедных + + Нарушение правил применения материалов, используемых в качестве +противогололедных, включая превышение предельно допустимой нормы их расхода, +использование в неустановленных местах либо использование материалов, не +разрешенных к применению, - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до четырех тысяч рублей; на должностных лиц - сорока тысяч рублей; на +юридических лиц - трехсот тысяч рублей. + + Статья 4.9. Ввоз и использование почвогрунтов, не соответствующих +экологическим требованиям + + 1. Ввоз на территорию города Москвы почвогрунтов без документов, +предусмотренных правовыми актами города Москвы, - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до четырех тысяч рублей; на должностных лиц - сорока тысяч рублей; на +юридических лиц - трехсот тысяч рублей. + + 2. Использование на объектах благоустройства и озеленения почвогрунтов, не +соответствующих экологическим требованиям, установленным правовыми актами города +Москвы, - + + влечет наложение административного штрафа на должностных лиц в размере +сорока пяти тысяч рублей; на юридических лиц - трехсот пятидесяти тысяч рублей. + + Статья 4.10. Производство, оборот нефтепродуктов, не соответствующих +экологическим требованиям + + 1. Производство, оборот нефтепродуктов, не соответствующих экологическим +требованиям, установленным правовыми актами города Москвы, - + + влечет наложение административного штрафа на лиц, осуществляющих +предпринимательскую деятельность без образования юридического лица, в размере +пятидесяти тысяч рублей; на юридических лиц - четырехсот тысяч рублей. + + 2. Повторное совершение административного правонарушения, предусмотренного +частью 1 настоящей статьи, - + + влечет наложение административного штрафа на юридических лиц в размере +одного миллиона рублей. + + Примечание. Под оборотом нефтепродуктов следует понимать транспортировку, +прием, поставку, хранение, реализацию (продажу, заправку) нефтепродуктов. + + Статья 4.11. Нарушение экологических требований при эксплуатации +автозаправочных станций + + 1. (Утратила силу - Закон Москвы от 25.05.2016 г. № 26) + + 2. Нарушение требований к установкам (системам) для улавливания и +рекуперации паров моторного топлива на автозаправочных станциях - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати пяти тысяч рублей; на юридических лиц - двухсот тысяч рублей. + + 3. (Утратила силу - Закон Москвы от 25.05.2016 г. № 26) + + Статья 4.12. Нарушение требований в области охраны окружающей среды при +осуществлении градостроительной и иных видов деятельности + + 1. Осуществление градостроительной и иных видов деятельности без +положительного заключения органа исполнительной власти города Москвы, +осуществляющего государственное управление в области охраны окружающей среды в +городе Москве, о соответствии экологическим требованиям предпроектной и +проектной документации - + + влечет наложение административного штрафа на должностных лиц в размере +пятидесяти тысяч рублей; на юридических лиц - трехсот тысяч рублей. + + 2. Осуществление градостроительной и иных видов деятельности, не +соответствующей документации, которая получила положительное заключение органа +исполнительной власти города Москвы, осуществляющего государственное управление +в области охраны окружающей среды в городе Москве, о соответствии экологическим +требованиям предпроектной и проектной документации - + + влечет наложение административного штрафа на должностных лиц в размере +сорока тысяч рублей; на юридических лиц - двухсот пятидесяти тысяч рублей. + + Статья 4.13. Пролив бетона (бетонного раствора, бетонных смесей) либо +просыпание сыпучих материалов во время их транспортировки + + Транспортировка бетона (бетонного раствора, бетонных смесей) либо сыпучих +материалов, повлекшая пролив бетона (бетонного раствора, бетонных смесей) либо +просыпание сыпучих материалов на дорогу, автомагистраль, тротуар, обочину, +примагистральную полосу газона, - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до четырех тысяч рублей; на должностных лиц - сорока тысяч рублей; на +юридических лиц - двухсот пятидесяти тысяч рублей. + + Статья 4.14. Нарушение правил обустройства строительных площадок пунктами +мойки колес автомобильного транспорта либо установками для сухой очистки колес +сжатым воздухом в зимний период + + (Утратила силу - Закон Москвы от 13.03.2013 г. № 11) + + Статья 4.15. Нарушение правил охраны окружающей среды при выводе из +эксплуатации предприятий, сооружений или иных объектов + + (Утратила силу - Закон Москвы от 25.05.2016 г. № 26) + + Статья 4.16. Невыполнение требований по проведению компенсационного +озеленения + + Невыполнение требований по проведению компенсационного озеленения - + + влечет наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - двухсот тысяч рублей. + + Статья 4.17. Нарушение правил создания и содержания зеленых насаждений + + 1. Нарушение правил создания зеленых насаждений - + + влечет наложение административного штрафа на должностных лиц в размере от +сорока тысяч до пятидесяти тысяч рублей; на юридических лиц - трехсот тысяч +рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + 2. Нарушение правил содержания зеленых насаждений, за исключением случаев, +предусмотренных частью 3 статьи 8.10 настоящего Кодекса, - (В редакции Закона +Москвы от 13.05.2015 г. № 26) + + влечет наложение административного штрафа на должностных лиц в размере от +сорока тысяч до пятидесяти тысяч рублей; на юридических лиц - трехсот пятидесяти +тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + 3. (Утратила силу - Закон Москвы от 13.05.2015 г. № 26) + + (Часть дополнена - Закон Москвы от 07.10.2009 г. № 42) + + Статья 4.18. Повреждение зеленых насаждений + + 1. Повреждение зеленых насаждений - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч пятисот до четырех тысяч рублей; на должностных лиц - пятидесяти тысяч +рублей; на юридических лиц - трехсот тысяч рублей. + + 2. Те же действия, совершенные с применением механизмов, +автомототранспортных средств, самоходных машин и других видов техники, - + + влекут наложение административного штрафа на граждан в размере от четырех +тысяч до четырех тысяч пятисот рублей; на должностных лиц - пятидесяти тысяч +рублей; на юридических лиц - трехсот тысяч рублей. + + Статья 4.19. Незаконное уничтожение зеленых насаждений + + Незаконное уничтожение зеленых насаждений - + + влечет наложение административного штрафа на граждан в размере от четырех +тысяч до четырех тысяч пятисот рублей; на должностных лиц - пятидесяти тысяч +рублей; на юридических лиц - трехсот тысяч рублей. + + Статья 4.20. Незаконные сбор или торговля растениями и животными, +занесенными в Красную книгу города Москвы + + Незаконные сбор или торговля растениями и животными, занесенными в Красную +книгу города Москвы, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до трех тысяч рублей; на должностных лиц - сорока тысяч рублей; на +юридических лиц - трехсот тысяч рублей. + + Статья 4.21. Уничтожение особо охраняемых объектов животного и +растительного мира + + 1. Уничтожение животных и растений на особо охраняемых природных +территориях в городе Москве - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч пятисот до четырех тысяч пятисот рублей; на должностных лиц - пятидесяти +тысяч рублей; на юридических лиц - трехсот тысяч рублей. + + 2. Уничтожение или повреждение критических местообитаний видов животных и +растений, занесенных в Красную книгу города Москвы, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до трех тысяч пятисот рублей; на должностных лиц - пятидесяти +тысяч рублей; на юридических лиц - трехсот тысяч рублей. + + 3. Уничтожение животных и растений, занесенных в Красную книгу города +Москвы, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до трех тысяч пятисот рублей; на должностных лиц - пятидесяти +тысяч рублей; на юридических лиц - трехсот тысяч рублей. + + Статья 4.22. Невыполнение условий порубочного билета + + Невыполнение условий порубочного билета - + + влечет наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - двухсот пятидесяти тысяч рублей. + + Статья 4.23. Осуществление пересадки зеленых насаждений без разрешения на +пересадку либо невыполнение условий разрешения на пересадку зеленых насаждений + + 1. Осуществление пересадки зеленых насаждений без разрешения на пересадку - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати тысяч рублей; на юридических лиц - двухсот тысяч рублей. + + 2. Невыполнение условий разрешения на пересадку зеленых насаждений - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати пяти тысяч рублей; на юридических лиц - двухсот тысяч рублей. + + Статья 4.24. Невыполнение обязанностей по организации и осуществлению +локального экологического мониторинга + + Невыполнение субъектами локального экологического мониторинга обязанностей +по организации и осуществлению локального экологического мониторинга, в том +числе по разработке программ локального экологического мониторинга, установке и +эксплуатации оборудования, необходимого для осуществления локального +экологического мониторинга, - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати тысяч рублей; на юридических лиц - ста тысяч рублей. + + Статья 4.25. Невыполнение или несвоевременное выполнение обязанностей по +предоставлению данных экологического мониторинга и экстренной информации + + 1. Невыполнение или несвоевременное выполнение специализированными +организациями обязанностей по предоставлению данных экологического мониторинга и +экстренной информации в Единый городской фонд данных экологического мониторинга +- + + влечет наложение административного штрафа на юридических лиц в размере +сорока тысяч рублей. + + 2. Невыполнение или несвоевременное выполнение субъектами локального +экологического мониторинга обязанностей по предоставлению данных экологического +мониторинга - + + влечет наложение административного штрафа на должностных лиц в размере +пятнадцати тысяч рублей; на юридических лиц - семидесяти тысяч рублей. + + 3. Те же деяния, создающие угрозу жизни и здоровью людей, - + + влекут ответственность в соответствии с федеральным законодательством. + + Статья 4.26. Предоставление недостоверных данных экологического мониторинга + + Предоставление субъектами экологического мониторинга недостоверных данных +экологического мониторинга в Единый городской фонд данных экологического +мониторинга, не влекущее создания угрозы жизни и здоровью людей, - + + влечет наложение административного штрафа на должностных лиц в размере +пятнадцати тысяч рублей; на юридических лиц - ста тысяч рублей. + + Статья 4.27. Распространение (перепечатка) информации Единого городского +фонда данных экологического мониторинга без ссылки на Единый городской фонд +данных экологического мониторинга + + Распространение (перепечатка) или иное использование информации Единого +городского фонда данных экологического мониторинга без ссылки на Единый +городской фонд данных экологического мониторинга как на официальный источник +информации - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до трех тысяч рублей; на должностных лиц - пятнадцати тысяч рублей; на +юридических лиц - семидесяти тысяч рублей. + + Статья 4.28. Производство работ по строительству, реконструкции, +капитальному ремонту объектов и реконструкции зеленых насаждений с нарушениями +условий размещения информационных щитов + + Производство работ по строительству, реконструкции, капитальному ремонту +объектов и реконструкции зеленых насаждений с нарушениями условий размещения +информационных щитов, установленных правовыми актами города Москвы, - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати тысяч рублей; на юридических лиц - ста тысяч рублей. + + Статья.29. Повреждение информационных щитов + + Уничтожение, повреждение или снос информационных щитов на особо охраняемых +природных территориях в городе Москве - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - десяти тысяч рублей; на +юридических лиц - ста тысяч рублей. + + Примечание. Под информационными щитами на особо охраняемых природных +территориях следует понимать материальные носители с текстовой информацией об +особо охраняемых природных территориях и (или) со схематичными изображениями +(картами, планами, схемами), рисунками эколого-просветительского характера. + + Статья 4.30. Неправомерная маркировка знаком соответствия "Экологичный +продукт" + + (Утратила силу - Закон Москвы от 03.04.2013 г. № 13) + + Статья 4.31. Нарушение индивидуальных условий комплексного +природопользования + + (Утратила силу - Закон Москвы от 25.05.2016 г. № 26) + + Статья 4.32. Осуществление деятельности без утвержденных индивидуальных +условий комплексного природопользования + + (Утратила силу - Закон Москвы от 25.05.2016 г. № 26) + + Статья 4.33. Неосуществление раздельного сбора отходов + + Неосуществление раздельного сбора отходов - + + влечет наложение административного штрафа на должностных лиц в размере +сорока тысяч рублей; на юридических лиц - двухсот пятидесяти тысяч рублей. + + Статья 4.34. Нарушение требований по раздельному сбору вторичных +материальных ресурсов, подлежащих переработке (обработке) во вторичное сырье + + 1. Нарушение требований по раздельному сбору вторичных материальных +ресурсов, подлежащих переработке (обработке) во вторичное сырье, - + + влечет наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - двухсот тысяч рублей. + + 2. Неосуществление раздельного сбора вторичных материальных ресурсов, +подлежащих переработке (обработке) во вторичное сырье, - + + влечет наложение административного штрафа на должностных лиц в размере +сорока тысяч рублей; на юридических лиц - двухсот пятидесяти тысяч рублей. + + Статья 4.35. Нарушение требований по транспортированию и (или) размещению +отходов строительства и сноса + + (Утратила силу - Закон Москвы от 13.03.2013 г. № 11) + + Статья 4.36. Нарушение порядка ведения Сводного кадастра отходов +производства и потребления города Москвы + + 1. Непредставление, несвоевременное представление данных, необходимых для +ведения Сводного кадастра отходов производства и потребления города Москвы, +лицом, обязанным в соответствии с правовыми актами города Москвы направлять +такие данные, - + + влечет наложение административного штрафа на должностных лиц в размере +десяти тысяч рублей; на юридических лиц - ста тысяч рублей. + + 2. Внесение недостоверных данных в Сводный кадастр отходов производства и +потребления города Москвы - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати тысяч рублей. + + Статья 4.37. Ввоз на территорию города Москвы отходов, не являющихся +вторичными материальными ресурсами + + Ввоз на территорию города Москвы отходов, не являющихся вторичными +материальными ресурсами, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до трех тысяч пятисот рублей; на должностных лиц - тридцати тысяч +рублей; на юридических лиц - трехсот тысяч рублей. (В редакции Закона Москвы от +07.10.2009 г. № 42) + + Статья 4.38. Несоблюдение установленных в заключении органа исполнительной +власти города Москвы, осуществляющего государственное управление в области +охраны окружающей среды, требований и условий + + Несоблюдение установленных в заключении органа исполнительной власти города +Москвы, осуществляющего государственное управление в области охраны окружающей +среды, требований и условий - + + влечет наложение административного штрафа на должностных лиц в размере +сорока тысяч рублей; на юридических лиц - двухсот пятидесяти тысяч рублей. + + Статья 4.39. Нарушение правил и требований в области охраны окружающей +среды + + Нарушение установленных правовыми актами города Москвы правил и требований +в области охраны окружающей среды - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до четырех тысяч рублей; на должностных лиц - пятидесяти тысяч рублей; на +юридических лиц - трехсот тысяч рублей. + + Статья 4.40. Непредставление экологической информации (сведений) + + Непредставление в орган исполнительной власти города Москвы, осуществляющий +государственное управление в области охраны окружающей среды в городе Москве, +его должностным лицам в установленный в запросе срок экологической информации +(сведений), а равно представление такой информации (сведений) не по всем +поставленным в запросе вопросам - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати тысяч рублей; на юридических лиц - ста пятидесяти тысяч рублей. + + Примечание. Под экологической информацией следует понимать информацию о +состоянии окружающей среды и природных ресурсов, об источниках загрязнения +окружающей среды и природных ресурсов или иного вредного воздействия на +окружающую среду и природные ресурсы, а также о деятельности в области охраны +окружающей среды. (В редакции Закона Москвы от 20.05.2009 г. № 12) + + Статья 4.41. Размещение транспортных средств на территории, занятой +зелеными насаждениями + + (Утратила силу - Закон Москвы от 13.05.2015 г. № 26) + + Статья 4.42. Нарушение экологических требований по обустройству мест +временного хранения отходов + + Несоблюдение экологических требований, установленных правовыми актами +города Москвы, по обустройству мест временного хранения отходов субъектами +хозяйственной и иной деятельности на занимаемых земельных участках - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати тысяч рублей; на юридических лиц - ста пятидесяти тысяч рублей. + + Статья 4.43. Нарушение требований к информационным щитам на строительных +площадках города Москвы + + 1. Отсутствие на строительных площадках города Москвы информационных щитов, +содержащих экологическую информацию, - + + влечет наложение административного штрафа на должностных лиц в размере +двадцати тысяч рублей; на юридических лиц - ста тысяч рублей. + + 2. Нарушение условий размещения информационных щитов на строительных +площадках города Москвы либо нарушение требований об указании на информационных +щитах, размещенных на строительных площадках города Москвы, информации об +условиях ведения строительных работ в ночное время и иной обязательной +информации, установленной правовыми актами города Москвы, - + + влечет наложение административного штрафа на должностных лиц в размере +пятнадцати тысяч рублей; на юридических лиц - восьмидесяти тысяч рублей. + + Статья 4.44. Осуществление хозяйственной и иной деятельности в границах +особо охраняемой природной территории в отсутствие охранного обязательства либо +с нарушением условий охранного обязательства + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 4.45. Нарушение экологических требований к уровню шума на особо +охраняемых природных территориях + + Нарушение установленных правовыми актами города Москвы экологических +требований к уровню шума на особо охраняемых природных территориях города Москвы +- + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до четырех тысяч рублей; на должностных лиц - тридцати тысяч рублей; на +юридических лиц - трехсот тысяч рублей. + + Статья 4.46. Нарушение условий производства подготовительных, земляных, +строительных и иных работ в ночное время, повлекшее превышение допустимого +уровня шума + + Нарушение установленных правовыми актами города Москвы условий производства +подготовительных, земляных, строительных и иных работ в ночное время, повлекшее +превышение допустимого уровня шума, за исключением случаев, предусмотренных +статьей 4.50 настоящего Кодекса, - (В редакции Закона Москвы от 17.12.2014 г. № +61) + + влечет наложение административного штрафа на должностных лиц в размере +сорока тысяч рублей; на юридических лиц - трехсот тысяч рублей. + + Статья 4.47. Неправомерный въезд, передвижение либо размещение +транспортного средства на особо охраняемой природной территории + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 4.48. Нарушения в области охраны и рационального использования +городских почв + + Нарушения в области охраны и рационального использования городских почв, в +том числе загрязнение, захламление, запечатывание территории без положительного +заключения органа исполнительной власти города Москвы, осуществляющего +государственное управление в области охраны окружающей среды в городе Москве, а +также несоблюдение требований и условий, содержащихся в заключении, - + + влекут наложение административного штрафа на граждан в размере от двух +тысяч до четырех тысяч рублей; на должностных лиц - тридцати тысяч рублей; на +юридических лиц - трехсот тысяч рублей. + + (Статья в редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 4.49. Невыполнение в срок законного предписания государственного +инспектора города Москвы по охране природы + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 4.50. Нарушение условий производства разгрузочно-погрузочных работ +на торговых объектах, встроенных в многоквартирные дома либо пристроенных +(встроенно-пристроенных) к многоквартирным домам, в ночное время + + 1. Освещение фасадов многоквартирных домов прожекторами и (или) фарами +транспортного средства, оставление двигателя транспортного средства работающим +при производстве разгрузочно-погрузочных работ на торговых объектах, встроенных +в многоквартирные дома либо пристроенных (встроенно-пристроенных) к +многоквартирным домам, в ночное время - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до двух тысяч рублей; на должностных лиц - от четырех +тысяч до восьми тысяч рублей; на юридических лиц - от сорока тысяч до +восьмидесяти тысяч рублей. + + 2. Нарушение установленных нормативными правовыми актами города Москвы +условий производства разгрузочно-погрузочных работ на торговых объектах, +встроенных в многоквартирные дома либо пристроенных (встроенно-пристроенных) к +многоквартирным домам, в ночное время, повлекшее превышение допустимого уровня +шума, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до двух тысяч рублей; на должностных лиц - от четырех +тысяч до восьми тысяч рублей; на юридических лиц - от сорока тысяч до +восьмидесяти тысяч рублей. + + (Статья дополнена - Закон Москвы от 17.12.2014 г. № 61) + + Глава 5. Административные правонарушения в области обращения с животными + + Статья 5.1. Нарушение правил содержания домашних животных + + 1. Содержание домашних животных в местах общего пользования коммунальных +квартир и многоквартирных домов - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до двух тысяч рублей. (В редакции Закона Москвы от +07.10.2009 г. № 42) + + 2. Допущение загрязнения домашними животными мест общего пользования в +многоквартирных домах, а также общественных мест - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч рублей; на должностных лиц - от двух тысяч до трех тысяч +рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + 3. Нарушение установленных правовыми актами города Москвы правил выгула +собак, в том числе появление с собакой без поводка и намордника в магазинах, +учреждениях, на детских площадках, рынках, пляжах и в транспорте, а также выгул +собак на территориях учреждений здравоохранения, детских садов, школ, иных +образовательных учреждений и учреждений, работающих с несовершеннолетними, - + + влечет наложение административного штрафа на граждан или должностных лиц в +размере от одной тысячи до двух тысяч рублей. (В редакции Закона Москвы от +07.10.2009 г. № 42) + + 4. Появление с собакой без поводка на природных и озелененных территориях, +а также на особо охраняемых природных территориях, если это деяние не содержит +признаков административного правонарушения, предусмотренного статьей 4.2 +настоящего Кодекса, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч рублей; на должностных лиц - от двух тысяч до трех тысяч +рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + 5. Допущение нападения домашнего животного на другое домашнее животное, +повлекшего увечье или гибель последнего, - + + влечет наложение административного штрафа на граждан или должностных лиц в +размере от четырех тысяч до пяти тысяч рублей. (В редакции Закона Москвы от +07.10.2009 г. № 42) + + 6. Причинение домашним животным дикому животному увечья или гибели - + + влечет наложение административного штрафа на граждан - владельцев домашних +животных в размере от четырех тысяч до пяти тысяч рублей. (В редакции Закона +Москвы от 07.10.2009 г. № 42) + + 7. Допущение по неосторожности нападения домашнего животного на человека с +причинением вреда здоровью человека, если это деяние не содержит признаков +преступления, предусмотренного статьей 118 Уголовного кодекса Российской +Федерации, - + + влечет наложение административного штрафа на граждан в размере от четырех +тысяч до пяти тысяч рублей; на должностных лиц - от четырех тысяч до десяти +тысяч рублей; на юридических лиц - от десяти тысяч до шестидесяти тысяч рублей. +(В редакции Закона Москвы от 07.10.2009 г. № 42) + + 8. Натравливание домашнего животного на людей или животных - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от пяти тысяч до десяти тысяч +рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + 9. Причинение ущерба чужому имуществу физическим воздействием домашнего +животного - + + влечет наложение административного штрафа на граждан в размере от пятисот +до одной тысячи рублей; на должностных лиц - от двух тысяч до трех тысяч рублей; +на юридических лиц - от шести тысяч до десяти тысяч рублей. (В редакции Закона +Москвы от 07.10.2009 г. № 42) + + Примечание. К диким животным по смыслу настоящей статьи не относятся +животные, в отношении которых осуществляются профилактические мероприятия +(дезинфекция, дератизация, дезинсекция). + + Статья 5.2. Нарушение правил транспортировки (передвижения), содержания и +использования лошадей в городе Москве + + (Утратила силу - Закон Москвы от 20.09.2017 г. № 35) + + Статья 5.3. Нарушение порядка провоза (транспортировки) домашних животных + + Нарушение порядка провоза домашних животных городским общественным +транспортом, а равно порядка их транспортировки в городе Москве - (В редакции +Закона Москвы от 20.09.2017 г. № 35) + + влечет наложение административного штрафа на граждан в размере от пятисот +до одной тысячи рублей; на должностных лиц - от двух тысяч до четырех тысяч +рублей; на юридических лиц - от четырех тысяч до шести тысяч рублей. (В редакции +Закона Москвы от 07.10.2009 г. № 42) + + Статья 5.4. Нарушение порядка проведения мероприятий с участием животных + + Нарушение установленного Правительством Москвы порядка проведения +мероприятий с участием животных в городе Москве - + + влечет наложение административного штрафа на должностных лиц в размере от +пятисот до двух тысяч рублей; на юридических лиц - от одной тысячи пятисот до +двух тысяч пятисот рублей. + + Статья 5.5. Нарушение порядка регулирования численности, отлова и +содержания безнадзорных домашних животных + + 1. Нарушение установленного правовыми актами города Москвы порядка +регулирования численности безнадзорных домашних животных - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - от трех тысяч до +четырех тысяч рублей; на юридических лиц - от тридцати тысяч до сорока тысяч +рублей. + + 2. Непринятие соответствующими организациями мер по отлову безнадзорных +домашних животных на подведомственных им территориях - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до двух тысяч рублей. + + 3. Нарушение установленных Правительством Москвы правил отлова и содержания +безнадзорных домашних животных - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до двух тысяч рублей; на юридических лиц - от двух тысяч до трех +тысяч рублей. + + Статья 5.6. Уклонение от регистрации и вакцинации собак и кошек + + Уклонение от регистрации и вакцинации собак и кошек против бешенства в +ветеринарных учреждениях - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до трех тысяч рублей; на должностных лиц - от четырех тысяч до шести тысяч +рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 5.7. Совершение жестоких действий в отношении животных + + 1. Жестокое обращение с животным, повлекшее его гибель или увечье, если это +деяние не содержит признаков преступления, предусмотренного статьей 245 +Уголовного кодекса Российской Федерации, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - от четырех тысяч до +пяти тысяч рублей; на юридических лиц - от пятнадцати тысяч до двадцати тысяч +рублей. + + 2. Содержание или транспортировка животного в условиях, приводящих к потере +его здоровья, не соответствующих его биологическим особенностям и требованиям +ветеринарно-санитарных правил, прекращение владельцем животного его +жизнеобеспечения - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - от четырех тысяч до +пяти тысяч рублей; на юридических лиц - от пятнадцати тысяч до двадцати тысяч +рублей. + + 3. Проведение на животном эксперимента без обезболивания или выведение +животного из эксперимента причиняющими боль методами - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - от четырех тысяч до +пяти тысяч рублей; на юридических лиц - от пятнадцати тысяч до двадцати тысяч +рублей. + + 4. Умерщвление животного, за исключением случаев, в которых +законодательством города Москвы допускается умерщвление животного, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - от четырех тысяч до +пяти тысяч рублей; на юридических лиц - от пятнадцати тысяч до двадцати тысяч +рублей. + + 5. Содержание домашних животных в целях использования их шкур и мяса (за +исключением сельскохозяйственных животных), а равно проведение боев животных, в +том числе с участием человека, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - от четырех тысяч до +пяти тысяч рублей; на юридических лиц - от двадцати тысяч до тридцати тысяч +рублей. + + Статья 5.8. Нарушение порядка захоронения или утилизации трупов домашних +животных + + Нарушение установленного правовыми актами города Москвы порядка захоронения +или утилизации трупов домашних животных - + + влечет наложение административного штрафа на граждан в размере от ста до +пятисот рублей; на должностных лиц - от одной тысячи до двух тысяч рублей; на +юридических лиц - от пяти тысяч до десяти тысяч рублей. + + Глава 6. Административные правонарушения в области землепользования + + Статья 6.1. Превышение сроков проектирования объектов + + Превышение заказчиками (инвесторами), собственниками, землевладельцами, +землепользователями и арендаторами земельных участков установленных сроков +проектирования объектов - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от десяти тысяч до тридцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста пятидесяти тысяч +рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + Статья 6.2. Длительное неосвоение земельного участка + + 1. Неосвоение земельного участка более трех лет или в сроки, установленные +договором аренды земельного участка, - (В редакции Закона Москвы от 24.06.2015 +г. № 32) + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от десяти тысяч до тридцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста пятидесяти тысяч +рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 2. Неустранение ранее выявленного административного правонарушения, +предусмотренного частью 1 настоящей статьи, в течение 30 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частью 1 настоящей статьи, - +(В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста тысяч до трехсот тысяч рублей. + + (Часть в редакции Закона Москвы от 24.06.2015 г. № 32) + + Статья 6.3. Превышение сроков строительства на земельном участке + + 1. Превышение заказчиками (инвесторами), собственниками, землевладельцами, +землепользователями и арендаторами земельных участков установленных сроков +строительства на земельном участке - (В редакции Закона Москвы от 24.06.2015 г. +№ 32) + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста тысяч до трехсот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + 2. (Утратила силу - Закон Москвы от 24.06.2015 г. № 32) + + Статья 6.4. Воспрепятствование законному пользованию земельным участком + + (Утратила силу - Закон Москвы от 24.06.2015 г. № 32) + + Статья 6.5. Самовольное ограничение доступа на земельные участки общего +пользования + + 1. Самовольное ограничение доступа на земельные участки общего пользования +- (В редакции Закона Москвы от 10.03.2010 г. № 6) + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от десяти тысяч до тридцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста пятидесяти тысяч +рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 2. Неустранение ранее выявленного административного правонарушения, +предусмотренного частью 1 настоящей статьи, в течение 30 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частью 1 настоящей статьи, - +(В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста тысяч до трехсот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + (Часть дополнена - Закон Москвы от 10.03.2010 г. № 6) + + Статья 6.6. Нарушение порядка согласования распоряжения земельным участком, +находящимся в собственности города Москвы, или земельным участком, собственность +на который не разграничена + + 1. Нарушение установленного Правительством Москвы порядка согласования +распоряжения земельным участком, находящимся в собственности города Москвы, или +земельным участком, государственная собственность на который не разграничена, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от двадцати тысяч до +пятидесяти тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста +пятидесяти тысяч рублей. + + (Часть в редакции Закона Москвы от 24.06.2015 г. № 32) + + 2. Неустранение ранее выявленного административного правонарушения, +предусмотренного частью 1 настоящей статьи, в течение 60 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частью 1 настоящей статьи, - +(В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста тысяч до трехсот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + (Часть дополнена - Закон Москвы от 10.03.2010 г. № 6) + + Статья 6.7. Нарушение разрешенного использования земельного участка + + 1. Нарушение требований и ограничений по использованию земельного участка, +установленных правовыми актами города Москвы, правоустанавливающими документами +на землю, сервитутами, - (В редакции законов Москвы от 10.03.2010 № 6, от +29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от двадцати тысяч до +пятидесяти тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста +пятидесяти тысяч рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 1.1. Нарушение требований и ограничений по использованию земельного +участка, связанных со строительством, с реконструкцией на нем здания, строения, +сооружения, установленных законами города Москвы, иными нормативными правовыми +актами города Москвы, правоустанавливающими документами на землю, проектной и +иной документацией, определяющей условия использования земельного участка, - + + влечет наложение административного штрафа на граждан в размере от 0,5 до 1 +процента кадастровой стоимости земельного участка; на должностных лиц - от 1 до +1,5 процента кадастровой стоимости земельного участка; на юридических лиц - от +1,5 до 2 процентов кадастровой стоимости земельного участка. + + (Дополнен частью - Закон Москвы от 29.04.2019 № 15) + + 2. Неустранение ранее выявленного административного правонарушения, +предусмотренного частями 1 и 1.1 настоящей статьи, в течение 60 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частями 1 и 1.1 настоящей +статьи, - (В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста тысяч до трехсот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + (Часть дополнена - Закон Москвы от 10.03.2010 г. № 6) + + Статья 6.8. Нарушение установленного режима использования земель + + 1. Нарушение установленного правовыми актами города Москвы, документами о +правах на землю режима использования земель особо охраняемых природных +территорий, земель природоохранного, рекреационного, историко-культурного +назначения и других земель с особыми условиями использования - (В редакции +Закона Москвы от 07.10.2009 г. № 42) + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от двадцати тысяч до +пятидесяти тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста +пятидесяти тысяч рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 2. Неустранение ранее выявленного административного правонарушения, +предусмотренного частью 1 настоящей статьи, в течение 60 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частью 1 настоящей статьи, - +(В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста тысяч до трехсот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + (Часть дополнена - Закон Москвы от 10.03.2010 г. № 6) + + Статья 6.9. Захламление земельных участков + + 1. Захламление земельных участков - (В редакции Закона Москвы от 24.06.2015 +г. № 32) + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от десяти тысяч до тридцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста пятидесяти тысяч +рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 2. Неустранение ранее выявленного административного правонарушения, +предусмотренного частью 1 настоящей статьи, в течение 30 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частью 1 настоящей статьи, - +(В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста тысяч до трехсот тысяч рублей. + + (Часть в редакции Закона Москвы от 24.06.2015 г. № 32) + + Статья 6.10. Самовольное перемещение межевых знаков границ земельных +участков + + (Утратила силу - Закон Москвы от 24.06.2015 г. № 32) + + Статья 6.11. Использование земельного участка с нарушением установленных +нормативными правовыми актами города Москвы требований к оформлению документов + + 1. Использование земельного участка, находящегося в собственности города +Москвы, или земельного участка, государственная собственность на который не +разграничена, с нарушением установленных нормативными правовыми актами города +Москвы требований к оформлению документов, являющихся основанием для +использования таких земельных участков, за исключением случаев, предусмотренных +статьей 6.13 настоящего Кодекса, - + + влечет наложение административного штрафа на граждан в размере трех тысяч +рублей; на должностных лиц - от десяти тысяч до тридцати тысяч рублей; на +юридических лиц - от пятидесяти тысяч до ста пятидесяти тысяч рублей. + + 2. Неустранение ранее выявленного административного правонарушения, +предусмотренного частью 1 настоящей статьи, в течение 30 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частью 1 настоящей статьи, - +(В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - от тридцати тысяч до пятидесяти тысяч рублей; на +юридических лиц - от ста тысяч до трехсот тысяч рублей. + + (Статья в редакции Закона Москвы от 24.06.2015 г. № 32) + + Статья 6.12. Использование земельного участка без оформленных в +установленном порядке правоустанавливающих документов на землю + + (Утратила силу - Закон Москвы от 24.06.2015 г. № 32) + + Статья 6.13. Нарушение требований к размещению сезонных (летних) кафе + + 1. Нарушение установленных Правительством Москвы сроков монтажа, демонтажа, +размещения сезонных (летних) кафе - + + влечет наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - ста тысяч рублей. + + 2. Размещение сезонного (летнего) кафе, включенного в схему размещения +сезонных (летних) кафе при стационарных предприятиях общественного питания, с +превышением площади и (или) нарушением места размещения, установленных указанной +схемой, - + + влечет наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - ста пятидесяти тысяч рублей. + + 3. Размещение сезонного (летнего) кафе, не включенного в схему размещения +сезонных (летних) кафе при стационарных предприятиях общественного питания, - + + влечет наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - двухсот тысяч рублей. + + 4. Действия, предусмотренные частью 3 настоящей статьи, совершенные в +границах улично-дорожной сети, - + + влекут наложение административного штрафа на должностных лиц в размере +тридцати тысяч рублей; на юридических лиц - трехсот тысяч рублей. + + 5. Неустранение ранее выявленного административного правонарушения, +предусмотренного частями 1 - 4 настоящей статьи, в течение 45 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частями 1 - 4 настоящей +статьи, - (В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на должностных лиц в размере +пятидесяти тысяч рублей; на юридических лиц - пятисот тысяч рублей. + + (Статья в редакции Закона Москвы от 19.12.2018 № 32) + + Глава 7. Административные правонарушения в области градостроительства и +эксплуатации объектов городской инфраструктуры + + Статья 7.1. Нарушение нормативов градостроительного проектирования + + 1. Нарушение нормативов градостроительного проектирования на исторических +территориях города Москвы, территориях зон охраны объектов культурного наследия +в городе Москве - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до одной тысячи пятисот рублей; на должностных лиц - от одной тысячи до +двух тысяч пятисот рублей; на юридических лиц - от десяти тысяч до двадцати +тысяч рублей. + + 2. Нарушение нормативов градостроительного проектирования города Москвы на +территориях, не указанных в части 1 настоящей статьи, - + + влечет наложение административного штрафа на граждан в размере от восьмисот +до одной тысячи рублей; на должностных лиц - от одной тысячи до одной тысячи +пятисот рублей; на юридических лиц - от одной тысячи пятисот до двух тысяч +пятисот рублей. + + Статья 7.2. Нарушение правил проведения архитектурных и градостроительных +конкурсов + + Нарушение установленных Правительством Москвы правил проведения +архитектурных и градостроительных конкурсов в городе Москве - + + влечет наложение административного штрафа на должностных лиц в размере от +двух тысяч до трех тысяч рублей; на юридических лиц - от четырех тысяч до шести +тысяч рублей. + + Статья 7.3. Нарушение требований по обеспечению благоприятной среды +жизнедеятельности в период строительства, реконструкции, комплексного +капитального ремонта градостроительных объектов + + Нарушение установленных законодательством города Москвы требований по +обеспечению благоприятной среды жизнедеятельности в период строительства, +реконструкции, комплексного капитального ремонта градостроительных объектов - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч рублей; на должностных лиц - от трех тысяч до пяти тысяч +рублей; на юридических лиц - от десяти тысяч до двадцати тысяч рублей. + + Статья 7.4. Нарушение норм и правил подготовки и проведения земляных, +строительных и дорожных работ + + (Утратила силу - Закон Москвы от 13.03.2013 г. № 11) + + Статья 7.5. Нарушение правил содержания и эксплуатации автомобильных дорог +и технических средств их обустройства + + (Утратила силу - Закон Москвы от 13.03.2013 г. № 11) + + Статья 7.6. Устройство искусственных дорожных неровностей на проезжей части +и на прилегающей к ней территории в нарушение порядка, установленного +Правительством Москвы + + (Утратила силу - Закон Москвы от 13.03.2013 г. № 11) + + Статья 7.7. Нарушение технологии ремонта дорожного покрытия + + (Утратила силу - Закон Москвы от 13.03.2013 г. № 11) + + Статья 7.8. Нарушение технологии производства асфальтобетонных смесей + + 1. Использование некачественных исходных материалов при изготовлении +асфальтобетонных смесей - + + влечет наложение административного штрафа на юридических лиц в размере +стоимости изготовленной смеси. + + 2. Несоответствие выпускаемых асфальтобетонных смесей требованиям +государственных стандартов (по результатам лабораторных испытаний) - + + влечет наложение административного штрафа на юридических лиц в размере +стоимости произведенной смеси. + + 3. Нарушение температурного режима при изготовлении асфальто-бетонных +смесей - + + влечет наложение административного штрафа на должностных лиц в размере от +трех тысяч до пяти тысяч рублей; на юридических лиц - от сорока тысяч до +пятидесяти тысяч рублей. + + (Часть в редакции Закона Москвы от 07.10.2009 г. № 42) + + 4. Отсутствие данных лабораторных испытаний или проведение их с нарушением +установленных требований - + + влечет наложение административного штрафа на должностных лиц в размере от +двух тысяч до трех тысяч рублей; на юридических лиц - от десяти тысяч до +тридцати тысяч рублей. + + (Часть дополнена - Закон Москвы от 07.10.2009 г. № 42) + + Статья 7.9. Самовольная установка ограждений, отсутствие ограждений в +местах разрытий или иных опасных местах + + 1. Установка ограждений или перекрытие ими проезжей части дорог без +разрешения, выданного уполномоченным Правительством Москвы органом, а равно +установка иных устройств, препятствующих движению транспорта и граждан, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от пятисот до одной тысячи рублей; на должностных лиц - от одной тысячи +до трех тысяч рублей; на юридических лиц - от десяти тысяч до двадцати тысяч +рублей. + + (Часть в редакции Закона Москвы от 07.10.2009 г. № 42) + + 2. Отсутствие ограждений или перекрытий ими проезжей части дорог в местах +разрытий или иных опасных местах - + + влечет наложение административного штрафа на должностных лиц в размере от +двух тысяч пятисот до четырех тысяч рублей; на юридических лиц - от пятнадцати +тысяч до двадцати пяти тысяч рублей. + + Статья 7.10. Нарушение правил пользования системами городского водопровода +и канализации + + (Утратила силу - Закон Москвы от 07.10.2009 г. № 42) + + Статья 7.11. Нарушение правил содержания и эксплуатации инженерных +коммуникаций и сооружений + + 1. Нарушение установленных Правительством Москвы правил содержания и +эксплуатации инженерных коммуникаций и сооружений - + + влечет наложение административного штрафа на должностных лиц в размере от +трех тысяч до четырех тысяч рублей; на юридических лиц - от десяти тысяч до +двадцати тысяч рублей. + + 2. Непринятие надлежащих мер по устранению дефектов инженерных коммуникаций +и сооружений, представляющих угрозу жизни или здоровью людей, в том числе +непринятие мер по устранению неисправностей колодцев и неисправностей люков на +колодцах, восстановлению провалившихся люков на колодцах, - + + влечет наложение административного штрафа на должностных лиц в размере от +двух тысяч до пяти тысяч рублей; на юридических лиц - от двадцати тысяч до +пятидесяти тысяч рублей. + + Статья 7.12. Несанкционированное вскрытие люков на колодцах и камерах, +решеток вентиляционных шахт (киосков), защитных оголовков, ворот, дверей, +запорных и защитных устройств подземных инженерных коммуникаций и сооружений + + Несанкционированное вскрытие люков на колодцах и камерах, решеток +вентиляционных шахт (киосков), защитных оголовков, ворот, дверей, запорных и +защитных устройств подземных инженерных коммуникаций и сооружений - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи пятисот до двух тысяч пятисот рублей; на должностных лиц +- от трех тысяч до четырех тысяч рублей; на юридических лиц - от пятнадцати +тысяч до тридцати тысяч рублей. + + Примечание. В настоящем Кодексе под подземными инженерными коммуникациями +следует понимать коммуникации, которые расположены в подземном пространстве +города Москвы и включают в себя водосточные, водопроводные, канализационные +коллекторы, силовые кабели, кабели связи, контрольные кабели, канализацию, +теплопроводы, водопроводы, водостоки и другие инженерные коммуникации; под +подземными инженерными сооружениями следует понимать сооружения, которые +расположены в подземном пространстве города Москвы и включают в себя +коммуникационные коллекторы, трубопроводы, станции, бойлерные, вентиляционные, +калориферные шахты и камеры, колодцы, артезианские скважины, глубокий дренаж, +подземные части фонтанов, аварийные выходы тоннельнотранспортных развязок, +сооружения метрополитена, защитные сооружения гражданской обороны, а также +связанные с ними наземные сооружения, в том числе трансформаторные подстанции, +центральные тепловые пункты, ремонтно-эксплуатационные комплексы и постройки, +диспетчерские пункты. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 7.13. Несанкционированное проникновение в подземные инженерные +коммуникации и сооружения, а также в трансформаторные и распределительные +подстанции и вводно-распределительные шкафы + + 1. Несанкционированное проникновение в подземные инженерные коммуникации и +сооружения, а также в трансформаторные и распределительные подстанции и +вводно-распределительные шкафы - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до одной тысячи пятисот рублей; на должностных лиц - от одной тысячи +пятисот до трех тысяч рублей; на юридических лиц - от десяти тысяч до пятнадцати +тысяч рублей. + + 2. Те же действия, сопряженные с несанкционированным вскрытием люков на +колодцах и камерах, решеток вентиляционных шахт (киосков), защитных оголовков, +ворот, дверей, запорных и защитных устройств подземных инженерных коммуникаций и +сооружений, - + + влекут наложение административного штрафа на граждан в размере от одной +тысячи пятисот до двух тысяч рублей; на должностных лиц - от двух тысяч до +четырех тысяч рублей; на юридических лиц - от пятнадцати тысяч до двадцати тысяч +рублей. + + Статья 7.14. Засорение подземных инженерных коммуникаций и сооружений + + 1. Сброс мусора, твердых производственных и бытовых отходов и иных +предметов в подземные инженерные коммуникации и сооружения, а равно сброс воды, +жидких производственных и бытовых отходов в подземные инженерные коммуникации и +сооружения, а также непринятие мер по удалению мусора, воды, твердых и жидких +производственных и бытовых отходов и иных предметов из подземных инженерных +коммуникаций и сооружений, не предназначенных для их размещения, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до одной тысячи пятисот рублей; на должностных лиц - от +одной тысячи пятисот до двух тысяч пятисот рублей; на юридических лиц - от +пятнадцати тысяч до тридцати тысяч рублей. + + 2. Те же действия, приведшие к затруднению прохода и доступа к подземным +инженерным коммуникациям и оборудованию подземных инженерных сооружений, - + + влекут наложение административного штрафа на граждан в размере от одной +тысячи до одной тысячи пятисот рублей; на должностных лиц - от четырех тысяч до +пяти тысяч рублей; на юридических лиц - от двадцати тысяч до пятидесяти тысяч +рублей. + + 3. Действия, указанные в части 1 настоящей статьи, приведшие к аварийным +ситуациям в подземных инженерных коммуникациях и сооружениях, - + + влекут наложение административного штрафа на граждан в размере от одной +тысячи пятисот до двух тысяч рублей; на должностных лиц - от четырех тысяч до +пяти тысяч рублей; на юридических лиц - от сорока тысяч до ста тысяч рублей. + + Статья 7.15. Несанкционированный сброс воды, жидких производственных и +бытовых отходов в подземные инженерные коммуникации и сооружения + + Несанкционированный сброс воды, жидких производственных и бытовых отходов в +подземные инженерные коммуникации и сооружения - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до одной тысячи пятисот рублей; на должностных лиц - от двух тысяч до +двух тысяч пятисот рублей; на юридических лиц - от пятнадцати тысяч до +пятидесяти тысяч рублей. + + Статья 7.16. Внесение (сброс) опасных веществ и предметов в подземные +инженерные коммуникации и сооружения + + 1. Внесение (сброс) горючих, отравляющих, ядовитых, легковоспламеняющихся и +иных опасных веществ и предметов в подземные инженерные коммуникации и +сооружения, а также непринятие мер по удалению горючих, отравляющих, ядовитых, +легковоспламеняющихся и иных опасных веществ и предметов из подземных инженерных +коммуникаций и сооружений при условии, что внесение (сброс) указанных веществ и +предметов не было (не был) связано (связан) с эксплуатацией и ремонтом подземных +инженерных коммуникаций и сооружений, если указанные действия не повлекли тяжких +последствий, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до одной тысячи пятисот рублей; на должностных лиц - от двух тысяч до +пяти тысяч рублей; на юридических лиц - от десяти тысяч до двадцати тысяч +рублей. + + 2. Те же действия, приведшие к аварийным ситуациям в подземных инженерных +коммуникациях и сооружениях, если это не повлекло тяжких последствий, - + + влекут наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - от четырех тысяч до +пяти тысяч рублей; на юридических лиц - от тридцати тысяч до пятидесяти тысяч +рублей. + + Статья 7.17. Повреждение подземных инженерных коммуникаций и сооружений по +неосторожности + + 1. Повреждение подземных инженерных коммуникаций и сооружений по +неосторожности, в том числе вследствие несоблюдения действующих норм и правил +согласования и проведения ремонтных, строительных и пусконаладочных работ, а +также непринятие мер по устранению причиненных по неосторожности повреждений +подземных инженерных коммуникаций и сооружений - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч рублей; на должностных лиц - от тридцати тысяч до сорока +тысяч рублей; на юридических лиц - от двухсот тысяч до двухсот пятидесяти тысяч +рублей. + + 2. Те же действия, приведшие к перерыву в эксплуатации подземных инженерных +коммуникаций и сооружений, - + + влекут наложение административного штрафа на граждан в размере от одной +тысячи пятисот до двух тысяч пятисот рублей; на должностных лиц - от тридцати +тысяч до сорока тысяч рублей; на юридических лиц - от трехсот тысяч до пятисот +тысяч рублей. + + Статья 7.18. Несанкционированное закрытие или открытие запорно-регулирующих +устройств на трубопроводах горячей, холодной воды, канализации и газопроводах + + 1. Несанкционированное закрытие или открытие запорнорегулирующих устройств +на трубопроводах горячей, холодной воды, канализации и газопроводах - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до одной тысячи пятисот рублей; на должностных лиц - от одной тысячи +пятисот до двух тысяч пятисот рублей; на юридических лиц - от десяти тысяч до +двадцати тысяч рублей. + + 2. Те же действия, приведшие к аварийным ситуациям, - + + влекут наложение административного штрафа на граждан в размере от одной +тысячи пятисот до двух тысяч пятисот рублей; на должностных лиц - от двух тысяч +пятисот до пяти тысяч рублей; на юридических лиц - от сорока тысяч до ста тысяч +рублей. + + Статья 7.19. Несанкционированный демонтаж оборудования, подземных +инженерных коммуникаций, наружных инженерных коммуникаций + + Несанкционированный демонтаж, то есть разборка на отдельные части +технологического, измерительного и иного оборудования в подземных инженерных +сооружениях, а также подземных и наружных инженерных коммуникаций, снятие их с +места установки без цели хищения, а равно непринятие мер по восстановлению +разобранного оборудования и коммуникаций и их монтажу на предусмотренных местах +установки - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи пятисот до двух тысяч пятисот рублей; на должностных лиц - от двух тысяч +до пяти тысяч рублей; на юридических лиц - от пяти тысяч до тридцати тысяч +рублей. + + Статья 7.20. Несанкционированная прокладка кабелей в подземных инженерных +сооружениях + + Несанкционированная прокладка кабелей в подземных инженерных сооружениях, а +также непринятие мер по удалению несанкционированно проложенных кабелей из +подземных инженерных сооружений - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до одной тысячи пятисот рублей; на должностных лиц - от двух тысяч до +трех тысяч рублей; на юридических лиц - от пяти тысяч до десяти тысяч рублей. + + Статья 7.21. Несанкционированное размещение оборудования и иных предметов в +подземных инженерных коммуникациях и сооружениях, на конструктивных элементах +наружных инженерных коммуникаций + + Несанкционированное размещение оборудования и иных предметов в подземных +инженерных коммуникациях и сооружениях, на конструктивных элементах наружных +инженерных коммуникаций, а также непринятие мер по удалению несанкционированно +размещенного оборудования и иных предметов из подземных инженерных коммуникаций +и сооружений - + + влечет наложение административного штрафа на граждан в размере от пятисот +до одной тысячи рублей; на должностных лиц - от двух тысяч до трех тысяч рублей; +на юридических лиц - от десяти тысяч до пятнадцати тысяч рублей. + + Статья 7.22. Несанкционированные установка временных сооружений, ведение +земляных работ в охранных зонах подземных инженерных коммуникаций и сооружений, +эстакад и открытых участков линий метрополитена, а также ходовой балки +Московской монорельсовой транспортной системы + + 1. Несанкционированные установка хозяйственных, бытовых сооружений, +гаражей, иных сооружений и конструкций, устройство автостоянок, свалок, +проездов, ведение земляных работ, посадка деревьев, кустарников, устройство +монументальных клумб, установка тентов-укрытий в охранных зонах подземных +инженерных коммуникаций и сооружений, эстакад и открытых участков линий +метрополитена, а также ходовой балки Московской монорельсовой транспортной +системы (далее - ММТС) - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч пятисот рублей; на должностных лиц - от трех тысяч до пяти +тысяч рублей; на юридических лиц - от пятнадцати тысяч до тридцати тысяч рублей. + + 2. Засыпка, асфальтирование камер, колодцев на подземных инженерных +коммуникациях - + + влечет наложение административного штрафа на граждан в размере от трехсот +до пятисот рублей; на должностных лиц - от двух тысяч до двух тысяч пятисот +рублей; на юридических лиц - от десяти тысяч до пятнадцати тысяч рублей. + + Статья 7.23. Несанкционированное ведение земляных работ в подземных +инженерных коммуникациях и сооружениях + + Несанкционированное ведение земляных работ в подземных инженерных +коммуникациях и сооружениях - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч рублей; на должностных лиц - от четырех тысяч до пяти тысяч +рублей; на юридических лиц - от тридцати тысяч до пятидесяти тысяч рублей. + + Статья 7.24. Несанкционированное нанесение на конструктивные элементы +подземных и наружных инженерных коммуникаций и сооружений надписей, размещение +на них информации + + (Утратила силу - Закон Москвы от 08.07.2015 г. № 46) + + Статья 7.25. Повреждение, несанкционированная ликвидация надписей, +указательных знаков и иной информации, необходимой для эксплуатации подземных и +наружных инженерных коммуникаций и сооружений + + Повреждение, несанкционированная ликвидация надписей, указательных знаков и +иной информации, необходимой для эксплуатации подземных и наружных инженерных +коммуникаций и сооружений, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от ста до трехсот рублей; на должностных лиц - от одной тысячи до двух +тысяч рублей; на юридических лиц - от двух тысяч до пяти тысяч рублей. + + Статья 7.26. Нарушение требований пожарной безопасности в подземных +инженерных коммуникациях и сооружениях + + (Утратила силу - Закон Москвы от 23.06.2010 г. № 28) + + Статья 7.27. Несоблюдение организациями лимитов энергопотребления + + Превышение государственными учреждениями и государственными унитарными +предприятиями города Москвы установленных для них в соответствии с правовыми +актами города Москвы лимитов потребления энергетических ресурсов - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до пяти тысяч рублей; на юридических лиц - от пятидесяти тысяч до +ста тысяч рублей. + + Статья 7.28. Нарушение порядка оснащения организаций приборами учета +расхода энергетических ресурсов + + Нарушение установленного Правительством Москвы порядка оснащения +организаций приборами учета расхода энергетических ресурсов - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до пяти тысяч рублей; на юридических лиц - от двадцати тысяч до +пятидесяти тысяч рублей. + + Статья 7.29. Нарушение порядка декларирования энергопотребления + + Нарушение государственными учреждениями и государственными унитарными +предприятиями города Москвы установленного в соответствии с законодательством +города Москвы порядка декларирования энергопотребления - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до пяти тысяч рублей; на юридических лиц - от двадцати тысяч до +пятидесяти тысяч рублей. + + Статья 7.30. Несоблюдение установленного порядка обеспечения инженерной +безопасности зданий и сооружений в ходе проектирования и монтажа +структурированных систем мониторинга и управления инженерными системами зданий и +сооружений на территории города Москвы + + 1. Несоблюдение установленного порядка обеспечения инженерной безопасности +зданий и сооружений в ходе проектирования и монтажа структурированных систем +мониторинга и управления инженерными системами зданий и сооружений на территории +города Москвы - + + влечет наложение административного штрафа на должностных лиц в размере от +двадцати тысяч до тридцати тысяч рублей; на юридических лиц - от двухсот тысяч +до трехсот тысяч рублей. + + 2. Те же действия, совершенные в отношении существующих зданий и сооружений +при их реконструкции и капитальном ремонте, - + + влекут наложение административного штрафа на должностных лиц в размере от +тридцати тысяч до сорока тысяч рублей; на юридических лиц - от трехсот тысяч до +четырехсот тысяч рублей. + + 3. Непроведение оснащения зданий и сооружений на территории города Москвы +структурированными системами мониторинга и управления инженерными системами +зданий и сооружений при новом строительстве, реконструкции и капитальном ремонте +зданий и сооружений - + + влечет наложение административного штрафа на должностных лиц в размере от +тридцати тысяч до пятидесяти тысяч рублей; на юридических лиц - от четырехсот +тысяч до пятисот тысяч рублей. + + (Статья дополнена - Закон Москвы от 07.10.2009 г. № 42) + + Глава 8. Административные правонарушения в области благоустройства города + + Статья 8.1. Нарушение норм и правил проектирования комплексного +благоустройства + + Нарушение установленных Правительством Москвы норм и правил проектирования +комплексного благоустройства на территории города Москвы - + + влечет наложение административного штрафа на граждан в размере от трехсот +до пятисот рублей; на должностных лиц - от пятисот до одной тысячи рублей; на +юридических лиц - от одной тысячи до одной тысячи пятисот рублей. + + Статья 8.2. Нарушение внешнего вида и содержания зданий, ограждений, +строений, сооружений, водоемов и элементов оборудования объектов благоустройства + + 1. Нарушение установленных Правительством Москвы требований к внешнему виду +и содержанию зданий, кроме жилых домов, ограждений, строений, сооружений, +водоемов, - (В редакции Закона Москвы от 17.10.2012 г. № 50) + + влечет предупреждение или наложение административного штрафа на должностных +лиц в размере от пяти тысяч до десяти тысяч рублей; на юридических лиц - от +пятидесяти тысяч до ста тысяч рублей. (В редакции Закона Москвы от 17.10.2012 г. +№ 50) + + 2. Нарушение установленных Правительством Москвы требований к внешнему виду +и содержанию элементов оборудования объектов благоустройства, в том числе малых +архитектурных форм, садовопарковой мебели, бункеров, контейнеров, урн, - (В +редакции Закона Москвы от 17.10.2012 г. № 50) + + влечет предупреждение или наложение административного штрафа на должностных +лиц в размере от трех тысяч до пяти тысяч рублей; на юридических лиц - от +пятидесяти тысяч до ста тысяч рублей. (В редакции Закона Москвы от 17.10.2012 г. +№ 50) + + Статья 8.2.1. Нарушение норм и правил по текущему ремонту, санитарному и +техническому содержанию внутридворовых спортивных площадок + + Нарушение установленных нормативными правовыми актами города Москвы норм и +правил по текущему ремонту, санитарному и техническому содержанию внутридворовых +спортивных площадок, несоблюдение норм безопасности при эксплуатации +оборудования спортивных площадок - + + влечет предупреждение или наложение административного штрафа на должностных +лиц в размере от четырех тысяч до пяти тысяч рублей. + + (Статья дополнена - Закон Москвы от 07.10.2009 г. № 42) + + Статья 8.3. Самовольная установка (размещение) объектов + + Самовольная установка (размещение) контейнеров, гаражей, строительной и +дорожной техники, объектов мелкорозничной торговли и оказания услуг населению, +складских помещений, бытовок и иных объектов, не являющихся объектами +капитального строительства, объектов на подземных коммуникациях, а также +самовольное возведение произведений монументально-декоративного искусства, - (В +редакции Закона Москвы от 13.03.2013 г. № 11) + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от восьми тысяч до десяти тысяч +рублей; на юридических лиц - от восьмидесяти тысяч до ста тысяч рублей. + + (Статья в редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 8.3.1. Нарушение требований безопасности при монтаже и эксплуатации +оборудования детских игровых площадок + + Нарушение требований безопасности при монтаже и эксплуатации оборудования +детских игровых площадок, в том числе отсутствие необходимой документации на +оборудование, несоблюдение норм безопасности при монтаже оборудования, +непроведение периодических осмотров и технического обслуживания оборудования и +его элементов в установленные сроки, невыполнение нормативов по информационному +обеспечению безопасности при монтаже и эксплуатации оборудования детских игровых +площадок - + + влечет наложение административного штрафа на должностных лиц в размере от +пяти тысяч до десяти тысяч рублей; на юридических лиц - от тридцати тысяч до +пятидесяти тысяч рублей. + + (Статья дополнена - Закон Москвы от 07.10.2009 г. № 42) + + Статья 8.4. Нарушение правил содержания и эксплуатации устройств наружного +освещения и контактной сети + + Нарушение установленных Правительством Москвы правил содержания и +эксплуатации устройств наружного освещения и контактной сети территории города +Москвы - (В редакции Закона Москвы от 07.10.2009 г. № 42) + + влечет предупреждение или наложение административного штрафа на должностных +лиц в размере от пятисот до одной тысячи пятисот рублей; на юридических лиц - от +одной тысячи до пяти тысяч рублей. + + Статья 8.5. Повреждение устройств и конструктивных элементов наружного +освещения и контактной сети + + Совершенное по неосторожности повреждение устройств и конструктивных +элементов наружного освещения и контактной сети территории города Москвы, +приводящее к разрушению этих устройств, несвоевременный демонтаж и вывоз +поврежденных устройств наружного освещения или демонтаж без соответствующего +разрешения - (В редакции Закона Москвы от 07.10.2009 г. № 42) + + влечет наложение административного штрафа на граждан в размере от пятисот +до одной тысячи рублей; на должностных лиц - от двух тысяч до трех тысяч рублей; +на юридических лиц - от пятнадцати тысяч до тридцати тысяч рублей. + + Статья 8.6. Нарушение требований к внешнему виду и содержанию рекламных +конструкций, а также к размещению, внешнему виду и содержанию объектов +праздничного и тематического оформления + + 1. Нарушение установленных Правительством Москвы требований к внешнему виду +рекламных конструкций, в том числе по соответствию типам (видам) и размерам, - + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - от тридцати тысяч до пятидесяти тысяч рублей; на +юридических лиц - от пятисот тысяч до одного миллиона рублей. + + 2. Нарушение установленных Правительством Москвы требований к содержанию +рекламных конструкций - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до двух тысяч рублей; на должностных лиц - от двадцати +тысяч до тридцати тысяч рублей; на юридических лиц - от пятидесяти тысяч до +семидесяти тысяч рублей. + + 3. Нарушение установленных Правительством Москвы требований к размещению, +внешнему виду и содержанию объектов праздничного и тематического оформления - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от пятисот до одной тысячи рублей; на должностных лиц - от десяти тысяч +до двадцати тысяч рублей; на юридических лиц - от десяти тысяч до пятидесяти +тысяч рублей. + + (Статья в редакции Закона Москвы от 07.05.2014 г. № 24) + + Статья 8.6.1. Нарушение требований к размещению, содержанию и внешнему виду +информационных конструкций + + 1. Нарушение установленных Правительством Москвы требований к внешнему виду +и содержанию информационных конструкций, в том числе к расположению букв на +информационном поле, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от пятисот до одной тысячи рублей; на должностных лиц - от десяти тысяч +до двадцати тысяч рублей; на юридических лиц - от десяти тысяч до пятидесяти +тысяч рублей. + + 2. Нарушение установленных Правительством Москвы требований к размещению +информационных конструкций, в том числе нарушение требований: + + к геометрическим параметрам информационных конструкций; + + о запрете полного перекрытия (закрытия) оконных и дверных проемов, а также +витражей и витрин; + + о запрете размещения вывесок в оконных проемах, к окраске и покрытию +декоративными пленками поверхности остекления витрин, замене остекления витрин +световыми коробами; + + о запрете размещения вывесок в границах жилых помещений, в том числе на +кровлях, лоджиях и балконах, архитектурных деталях фасадов объектов (в том числе +на колоннах, пилястрах, орнаментах, лепнине); + + о перекрытии (закрытии) указателей наименований улиц и номеров домов, +размещении консольных вывесок на расстоянии менее 10 метров друг от друга, а +также о размещении одной консольной вывески над другой; + + о запрете размещения вывесок выше линии второго этажа (линии перекрытий +между первым и вторым этажами) при размещении на внешних поверхностях +многоквартирных домов; + + об ограничении допустимого количества вывесок, - + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - от тридцати тысяч до пятидесяти тысяч рублей; на +юридических лиц - от двухсот тысяч до пятисот тысяч рублей. + + (Статья дополнена - Закон Москвы от 07.05.2014 г. № 24) + + Статья 8.7. Нарушение правил установки и содержания указателей с +наименованиями улиц и номерами домов, флагодержателей на зданиях и сооружениях + + 1. Нарушение утвержденных Правительством Москвы правил установки и +содержания указателей с наименованиями улиц и номерами домов, флагодержателей на +зданиях и сооружениях - + + влечет наложение административного штрафа на должностных лиц в размере от +трех тысяч до пяти тысяч рублей; на юридических лиц - от десяти тысяч до +тридцати тысяч рублей. + + 2. Воспрепятствование установке указателей с наименованиями улиц и номерами +домов, а равно их повреждение или уничтожение - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до четырех тысяч рублей; на должностных лиц - от пяти тысяч до семи тысяч +рублей; на юридических лиц - от десяти тысяч до тридцати тысяч рублей. + + 3. Размещение транспортных средств на детских площадках - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от трех тысяч до пяти тысяч рублей; на должностных лиц - от пятнадцати +тысяч до двадцати тысяч рублей; на юридических лиц - от ста тысяч до двухсот +тысяч рублей. + + (Часть дополнена - Закон Москвы от 13.03.2013 г. № 11) + + (Статья в редакции Закона Москвы от 12.12.2012 г. № 65) + + Статья 8.8. Загрязнение территории, связанное с эксплуатацией и ремонтом +транспортных средств + + 1. Загрязнение территории города Москвы, связанное с эксплуатацией и +ремонтом транспортных средств, мойка транспортных средств вне специально +отведенных мест, стоянка на проезжей части дворовых территорий, препятствующая +механизированной уборке и вывозу бытовых отходов, а также размещение +разукомплектованных транспортных средств независимо от места их расположения, +кроме специально отведенных для этого мест, - (В редакции законов Москвы от +07.10.2009 г. № 42; от 13.03.2013 г. № 11) + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до трех тысяч рублей; на должностных лиц - от пяти тысяч +до десяти тысяч рублей; на юридических лиц - от десяти тысяч до двадцати тысяч +рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + 2. Загрязнение, загромождение территории города Москвы, связанное с +самовольной парковкой (отстоем) грузового автотранспорта в жилой зоне и на +дворовых территориях, кроме специально отведенных для этого мест, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до трех тысяч рублей; на должностных лиц - от пяти тысяч +до десяти тысяч рублей; на юридических лиц - от десяти тысяч до двадцати тысяч +рублей. + + (Часть дополнена - Закон Москвы от 07.10.2009 г. № 42) + + Статья 8.9. Самовольный сброс или сжигание мусора + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 8.10. Нарушение правил санитарного содержания территорий, +организации уборки и обеспечения чистоты и порядка в городе Москве + + 1. Нарушение установленных Правительством Москвы правил санитарного +содержания территорий, организации уборки и обеспечения чистоты и порядка в +городе Москвы, за исключением случаев, предусмотренных статьями 4.8 и 8.16 +настоящего Кодекса, - (В редакции законов Москвы от 13.03.2013 г. № 11; от +13.05.2015 г. № 26) + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от пяти тысяч до десяти тысяч +рублей; на юридических лиц - от пятидесяти тысяч до двухсот тысяч рублей. (В +редакции Закона Москвы от 17.10.2012 г. № 50) + + 2. (Утратила силу - Закон Москвы от 13.03.2013 г. № 11) + + 3. Непринятие мер по восстановлению утраченного (уничтоженного) травяного +покрова газона - + + влечет наложение административного штрафа на должностных лиц в размере от +сорока тысяч до пятидесяти тысяч рублей; на юридических лиц - от пятидесяти +тысяч до ста тысяч рублей. + + (Часть дополнена - Закон Москвы от 13.05.2015 г. № 26) + + 4. Нарушение порядка выполнения работ по цветочному оформлению города +Москвы, выразившееся в несоответствии площади цветника площади, утвержденной +паспортом цветника, несоблюдении норм высадки рассады, а также нарушение +требований по сбору мусора, поливу, прополке, замене увядшей или утраченной +рассады - + + влечет наложение административного штрафа на должностных лиц в размере +пятидесяти тысяч рублей; на юридических лиц - ста тысяч рублей. + + (Часть дополнена - Закон Москвы от 13.05.2015 г. № 26) + + Статья 8.11. Подтопление городских улиц + + Подтопление городских улиц вследствие утечки воды, свободного сброса воды +на территорию города Москвы, неисправности водоприемных устройств либо +вследствие нарушения правил их обслуживания - + + влечет наложение административного штрафа на должностных лиц в размере от +двух тысяч до трех тысяч рублей; на юридических лиц - от трех тысяч до пяти +тысяч рублей. + + Статья 8.12. Непринятие мер по очистке крыш от снега и льда + + 1. Непринятие собственниками и иными законными владельцами зданий и +сооружений, кроме жилых домов и объектов недвижимого имущества, находящихся в +государственной или муниципальной собственности, мер по очистке крыш от снега и +удалению наростов льда на карнизах, крышах и водостоках - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от десяти тысяч до двадцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до двухсот тысяч рублей. +(В редакции Закона Москвы от 22.05.2019 № 20) + + 2. Непринятие законными владельцами зданий и сооружений, находящихся в +государственной или муниципальной собственности, за исключением особых объектов +нежилого фонда города Москвы, мер по очистке крыш от снега и удалению наростов +льда на карнизах, крышах и водостоках - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от десяти тысяч до двадцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до двухсот тысяч рублей. +(В редакции Закона Москвы от 22.05.2019 № 20) + + 3. Непринятие законными владельцами особых объектов нежилого фонда города +Москвы мер по очистке крыш от снега и удалению наростов льда на карнизах, крышах +и водостоках - + + влечет наложение административного штрафа на должностных лиц в размере от +пятнадцати тысяч до тридцати тысяч рублей; на юридических лиц - от пятидесяти +тысяч до двухсот тысяч рублей. (В редакции Закона Москвы от 22.05.2019 № 20) + + (Статья в редакции Закона Москвы от 10.12.2008 г. № 64) + + Статья 8.13. Несанкционированное размещение информации на объектах + + 1. Размещение объявлений и иных информационных материалов вне специально +отведенных для этого мест, за исключением случаев, предусмотренных частью 5 +настоящей статьи и статьей 10.8.1 настоящего Кодекса, - (В редакции Закона +Москвы от 08.07.2015 г. № 46) + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до пяти тысяч рублей; на должностных лиц - от двадцати +пяти тысяч до пятидесяти тысяч рублей; на юридических лиц - от пятидесяти тысяч +до ста тысяч рублей. (В редакции Закона Москвы от 24.04.2013 г. № 19) + + 2. Несанкционированное нанесение надписей, изображений на конструктивные +элементы подземных и наземных инженерных коммуникаций, зданий и сооружений, в +том числе на таксофонное оборудование, а равно размещение на них иной +информации, если это не повлекло повреждение или уничтожение имущества, за +исключением случаев, предусмотренных частью 5 настоящей статьи, - (В редакции +Закона Москвы от 08.07.2015 г. № 46) + + влечет наложение административного штрафа на граждан в размере от пятисот +до двух тысяч пятисот рублей; на должностных лиц - от пяти тысяч до семи тысяч +пятисот рублей; на юридических лиц - от семи тысяч пятисот до пятнадцати тысяч +рублей. (В редакции Закона Москвы от 24.04.2013 г. № 19) + + 3. Невыполнение работ по удалению самовольно размещенных рекламных и иных +объявлений, надписей и изображений с внешних поверхностей объектов (элементов) +благоустройства - + + влечет предупреждение или наложение административного штрафа на должностных +лиц в размере от трех тысяч до пяти тысяч рублей; на юридических лиц - от +пятидесяти тысяч до ста тысяч рублей. + + (Часть дополнена - Закон Москвы от 13.03.2013 г. № 11) + + 4. Нанесение на покрытия дорог (улично-дорожной сети), тротуаров, +пешеходных зон, велосипедных и пешеходных дорожек надписей и изображений, +выполненных стойкими материалами, кроме относящихся к порядку их эксплуатации и +наносимых в рамках исполнения государственного или муниципального контракта - + + влечет наложение административного штрафа на граждан в размере от пятисот +до двух тысяч пятисот рублей; на должностных лиц - от пяти тысяч до семи тысяч +пятисот рублей; на юридических лиц - от семи тысяч пятисот до пятнадцати тысяч +рублей. + + 5. Нанесение надписей, изображений, размещение информационных и рекламных +материалов на объекты инфраструктуры городского транспорта общего пользования, +кроме относящихся к порядку их эксплуатации и (или) наносимых (размещаемых) в +рамках исполнения государственного контракта или иного договора, заключенного с +уполномоченным органом исполнительной власти города Москвы, государственным +унитарным предприятием города Москвы или государственным учреждением города +Москвы, - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати пяти тысяч до +пятидесяти тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста +пятидесяти тысяч рублей. + + (Часть дополнена - Закон Москвы от 08.07.2015 г. № 46) + + Примечание. Под объектами инфраструктуры городского транспорта общего +пользования в настоящей статье следует понимать остановочные пункты наземного +городского транспорта общего пользования, а также размещенные на них сооружения, +конструкции, пункты и автоматы для продажи билетов для проезда на наземном +городском транспорте общего пользования, метрополитене и ММТС, дорожные знаки, +обозначающие места остановок трамвая, автобуса и (или) троллейбуса, +информационные табло и остановочные указатели, элементы конструкций трамвайных +путей, конструкции и сооружения метрополитена и ММТС, трамвайных депо, +троллейбусных, автобусных парков, стрелочные посты, опоры контактной сети +троллейбуса и трамвая. (Дополнен - Закон Москвы от 08.07.2015 г. № 46) + + (Часть дополнена - Закон Москвы от 24.04.2013 г. № 19) + + Статья 8.14. Неуплата за размещение транспортного средства на платной +городской парковке + + 1. (Утратила силу - Закон Москвы от 18.06.2014 г. № 31) + + 2. Неуплата за размещение транспортного средства на платной городской +парковке - (В редакции Закона Москвы от 18.06.2014 г. № 31) + + влечет наложение административного штрафа на граждан, должностных лиц и +юридических лиц в размере пяти тысяч рублей. (В редакции Закона Москвы от +26.12.2018 № 40) + + 3. (Утратила силу - Закон Москвы от 18.06.2014 г. № 31) + + Примечание. Под городской парковкой в настоящей статье следует понимать +объект благоустройства города Москвы, представляющий собой специально +обозначенное, обустроенное и оборудованное место, являющееся частью +автомобильной дороги и (или) примыкающее к проезжей части и (или) тротуару, +обочине, эстакаде или мосту либо являющееся частью подэстакадных или подмостовых +пространств, площадей и иных объектов улично-дорожной сети, находящихся в +собственности города Москвы, и предназначенное для организованной стоянки +транспортных средств на платной основе или без взимания платы по решению +Правительства Москвы. + + (Статья дополнена - Закон Москвы от 24.10.2012 г. № 51) + + Статья 8.15. Сброс снега, собранного при очистке объектов благоустройства, +с грузовых транспортных средств вне специально отведенных мест + + Сброс снега, собранного при очистке объектов благоустройства, с грузовых +транспортных средств вне специально отведенных мест - + + влечет наложение административного штрафа на должностных лиц в размере +сорока тысяч рублей; на юридических лиц - трехсот тысяч рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.16. Нарушение правил обустройства строительных площадок пунктами +мойки колес автомобильного транспорта либо установками для сухой очистки колес +сжатым воздухом в зимний период + + 1. Невыполнение требований по обустройству строительных площадок пунктами +мойки колес автомобильного транспорта либо установками для сухой очистки колес +сжатым воздухом в зимний период - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до четырех тысяч рублей; на должностных лиц - пятидесяти тысяч рублей; на +юридических лиц - трехсот тысяч рублей. + + 2. Неиспользование на строительных площадках пунктов мойки колес +автомобильного транспорта либо установок для сухой очистки колес сжатым воздухом +в зимний период - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - пятидесяти тысяч рублей; на +юридических лиц - трехсот тысяч рублей. + + 3. Вынос грунта и грязи автотранспортом, выезжающим со строительных +площадок либо с территорий предприятий, производящих строительные материалы, на +территорию города Москвы - + + влечет наложение административного штрафа на должностных лиц в размере от +двадцати тысяч до сорока тысяч рублей; на юридических лиц - от трехсот тысяч до +пятисот тысяч рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.17. Нарушение требований по транспортированию и (или) размещению +отходов строительства и сноса + + 1. Транспортирование и (или) размещение отходов строительства и сноса с +нарушением технологического регламента процесса обращения с отходами +строительства и сноса, зарегистрированного в установленном порядке, и (или) +условий, содержащихся в разрешении на их перемещение, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до трех тысяч пятисот рублей; на должностных лиц - двадцати тысяч +рублей; на юридических лиц - ста пятидесяти тысяч рублей. + + 2. Транспортирование и (или) размещение отходов строительства и сноса без +разрешения на их перемещение - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до трех тысяч пятисот рублей; на должностных лиц - тридцати тысяч +рублей; на юридических лиц - двухсот тысяч рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.18. Нарушение правил производства земляных работ и работ по +организации площадок для проведения отдельных работ в сфере благоустройства + + 1. Нарушение установленных Правительством Москвы правил производства +земляных работ и работ по организации площадок для проведения отдельных работ в +сфере благоустройства, в том числе отсутствие утвержденной проектной +документации или необходимых согласований при проведении указанных работ, +несвоевременное восстановление благоустройства территории после их завершения, +непринятие мер по ликвидации провала асфальта (грунта), связанного с +производством разрытий, а также несоблюдение установленных требований к +обустройству и содержанию строительных площадок - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от трех тысяч до пяти тысяч рублей; на должностных лиц - от двадцати +тысяч до тридцати пяти тысяч рублей; на юридических лиц - от трехсот тысяч до +пятисот тысяч рублей. + + 2. Производство земляных работ и работ по организации площадок для +проведения отдельных работ в сфере благоустройства без соответствующего ордера +или после окончания срока его действия, самовольные строительство, снос, +реконструкция или капитальный ремонт зданий, сооружений и коммуникаций, а также +нарушение сроков выполнения работ в соответствии с ордером - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от семисот тысяч до одного миллиона рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.19. Повреждение объектов (элементов) благоустройства, инженерных +коммуникаций и сооружений + + 1. Повреждение объектов (элементов) благоустройства, инженерных +коммуникаций и сооружений, за исключением случаев, предусмотренных статьями +4.18, 4.19, 7.17, 8.5 настоящего Кодекса, - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до сорока +тысяч рублей; на юридических лиц - от трехсот тысяч до пятисот тысяч рублей. + + 2. Действия, указанные в части 1 настоящей статьи, повлекшие уничтожение +объектов (элементов) благоустройства или перерыв в эксплуатации инженерных +коммуникаций и сооружений, - + + влекут наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от сорока тысяч до пятидесяти +тысяч рублей; на юридических лиц - от восьмисот тысяч до одного миллиона рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.20. Нарушение правил содержания и эксплуатации автомобильных дорог +(объектов улично-дорожной сети) и технических средств их обустройства + + 1. Нарушение правил содержания и эксплуатации автомобильных дорог (объектов +улично-дорожной сети) и технических средств их обустройства - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи пятисот до двух тысяч рублей; на должностных лиц - от +пяти тысяч до десяти тысяч рублей; на юридических лиц - от тридцати тысяч до +пятидесяти тысяч рублей. + + 2. Неудовлетворительное содержание дорожных ограждений, дорожных знаков, +светофоров, отсутствие дорожной разметки или нарушение правил ее нанесения - + + влечет предупреждение или наложение административного штрафа на должностных +лиц в размере от двух тысяч до трех тысяч рублей; на юридических лиц - от +двадцати тысяч до тридцати тысяч рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.21. Устройство искусственных дорожных неровностей на проезжей +части и прилегающей к ней территории в нарушение порядка, установленного +Правительством Москвы + + Устройство искусственных дорожных неровностей на проезжей части и +прилегающей к ней территории в нарушение порядка, установленного Правительством +Москвы, - + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - от семи тысяч до десяти тысяч рублей; на +юридических лиц - от десяти тысяч до двадцати тысяч рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.22. Нарушение технологии ремонта дорожного покрытия + + 1. Нарушение технологии асфальтобетонных работ при ремонте дорожного +покрытия, а также использование асфальтобетонной смеси, не соответствующей +требованиям государственных стандартов, технических условий, - + + влечет наложение административного штрафа на должностных лиц в размере от +пяти тысяч до десяти тысяч рублей; на юридических лиц - от тридцати тысяч до +пятидесяти тысяч рублей. + + 2. Нарушение технологии укладки асфальтобетонного покрытия - + + влечет наложение административного штрафа на юридических лиц в размере от +ста тысяч до трехсот тысяч рублей. + + 3. Недостаточное уплотнение асфальтобетонного покрытия при его укладке - + + влечет наложение административного штрафа на юридических лиц в размере от +ста тысяч до трехсот тысяч рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.23. Невыполнение требований по приспособлению объектов (элементов) +благоустройства для беспрепятственного доступа инвалидов и иных маломобильных +граждан + + Невыполнение установленных нормативными правовыми актами города Москвы +требований по приспособлению объектов (элементов) благоустройства для +беспрепятственного доступа инвалидов и иных маломобильных граждан - + + влечет наложение административного штрафа на должностных лиц в размере от +десяти тысяч до двадцати тысяч рублей; на юридических лиц - от ста тысяч до +двухсот тысяч рублей. + + (Статья дополнена - Закон Москвы от 13.03.2013 г. № 11) + + Статья 8.24. Нарушение установленных Правительством Москвы ограничений +движения грузового автотранспорта в городе Москве + + (Утратила силу - Закон Москвы от 26.06.2013 г. № 36) + + Статья 8.25. Размещение транспортных средств на территории, занятой +зелеными насаждениями + + Размещение транспортных средств на газоне или иной территории, занятой +зелеными насаждениями, - + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - тридцати тысяч рублей; на юридических лиц - трехсот +тысяч рублей. + + (Статья дополнена - Закон Москвы от 13.05.2015 г. № 26) + + Статья 8.26. Нарушение требований к обустройству и эксплуатации сезонных +(летних) кафе + + Нарушение установленных Правительством Москвы требований к обустройству и +эксплуатации сезонных (летних) кафе - + + влечет наложение административного штрафа на должностных лиц в размере от +десяти тысяч до тридцати тысяч рублей; на юридических лиц - от пятидесяти тысяч +до ста пятидесяти тысяч рублей. + + (Статья дополнена - Закон Москвы от 24.06.2015 г. № 32) + + Глава 9. Административные правонарушения в области использования и +эксплуатации жилищного фонда и объектов нежилого фонда + + Статья 9.1. Нарушение нормативов по эксплуатации жилищного фонда + + Нарушение установленных Правительством Москвы нормативов по эксплуатации +жилищного фонда - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч рублей; на должностных лиц - от пяти тысяч до пятидесяти +тысяч рублей; на юридических лиц - от пятнадцати тысяч до ста тысяч рублей. (В +редакции Закона Москвы от 17.10.2012 г. № 50) + + Статья 9.2. Нарушение порядка использования подвалов, чердаков и иных +нежилых помещений в многоквартирных домах + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 9.3. Нарушение порядка учета объектов нежилого фонда + + (Утратила силу - Закон Москвы от 24.06.2015 г. № 32) + + Статья 9.3.1. Нарушение требований о паспортизации жилых помещений +(квартир) в городе Москве + + Нарушение поставщиками паспортной информации, изготовителями технического +паспорта жилого помещения (квартиры) и потребителями паспортной информации +требований, установленных Законом города Москвы от 3 ноября 2004 года № 66 "О +паспортизации жилых помещений (квартир) в городе Москве", - + + влечет наложение административного штрафа на граждан - от пятисот до тысячи +рублей, на должностных лиц - от одной тысячи до двух тысяч рублей; на +юридических лиц - от десяти тысяч до двадцати тысяч рублей. + + (Статья дополнена - Закон Москвы от 07.10.2009 г. № 42) + + Статья 9.4. Нарушение порядка занятия и (или) использования объектов +нежилого фонда + + 1. Нарушение установленного Правительством Москвы порядка занятия и (или) +использования объектов нежилого фонда, находящихся в собственности города +Москвы, за исключением случаев, предусмотренных статьей 9.7 настоящего Кодекса, +- + + влечет наложение административного штрафа на граждан в размере трех тысяч +рублей; на должностных лиц - от десяти тысяч до тридцати тысяч рублей; на +юридических лиц - от ста тысяч до двухсот тысяч рублей. + + 2. Действия, указанные в части 1 настоящей статьи, совершенные в отношении +объектов нежилого фонда, являющихся объектами культурного наследия, - + + влекут наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - от двадцати тысяч до пятидесяти тысяч рублей; на +юридических лиц - от двухсот тысяч до трехсот тысяч рублей. + + 3. Неустранение ранее выявленного административного правонарушения, +предусмотренного частями 1 и 2 настоящей статьи, в течение 90 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частями 1 и 2 настоящей +статьи, - (В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - от тридцати тысяч до пятидесяти тысяч рублей; на +юридических лиц - от трехсот тысяч до пятисот тысяч рублей. + + (Статья в редакции Закона Москвы от 24.06.2015 г. № 32) + + Статья 9.5. Использование объектов нежилого фонда без надлежаще оформленных +правоустанавливающих документов + + (Утратила силу - Закон Москвы от 21.01.2015 г. № 1) + + Статья 9.6. Нарушение порядка согласования распоряжения объектами нежилого +фонда + + 1. Нарушение установленного Правительством Москвы порядка согласования +распоряжения объектами нежилого фонда, находящимися в собственности города +Москвы, переданными в безвозмездное пользование, хозяйственное ведение, +оперативное управление, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от десяти тысяч до тридцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста пятидесяти тысяч +рублей. + + 2. Нарушение установленного Правительством Москвы порядка согласования +распоряжения объектами нежилого фонда, находящимися в собственности города +Москвы и переданными в аренду (в том числе порядка согласования передачи объекта +аренды полностью или частично в субаренду, в качестве предмета залога, паевого +взноса, вклада в уставный (складочный) капитал юридических лиц, передачи +полностью или частично прав по договору аренды третьим лицам), - + + влечет наложение административного штрафа на граждан в размере трех тысяч +рублей; на должностных лиц - от десяти тысяч до тридцати тысяч рублей; на +юридических лиц - от ста тысяч до трехсот тысяч рублей. + + 3. Действия, указанные в частях 1 и 2 настоящей статьи, совершенные в +отношении объектов нежилого фонда, являющихся объектами культурного наследия, - + + влекут наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста пятидесяти тысяч до трехсот пятидесяти +тысяч рублей. + + 4. Неустранение ранее выявленного административного правонарушения, +предусмотренного частями 1 - 3 настоящей статьи, в течение 60 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частями 1 - 3 настоящей +статьи, - (В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от трехсот тысяч до пятисот тысяч рублей. + + (Статья в редакции Закона Москвы от 24.06.2015 г. № 32) + + Статья 9.7. Нарушение порядка изменения целевого назначения объектов +нежилого фонда + + 1. Использование объектов нежилого фонда, находящихся в собственности +города Москвы, с нарушением установленного порядка изменения целевого назначения +нежилого помещения (его части), без наличия соответствующего разрешения - (В +редакции законов Москвы от 10.03.2010 г. № 6; от 24.06.2015 г. № 32) + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от десяти тысяч до тридцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста пятидесяти тысяч +рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 1.1. Те же действия, совершенные в отношении объектов нежилого фонда, +являющихся объектами культурного наследия, - + + влекут наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от двухсот тысяч до пятисот тысяч рублей. + + (Часть дополнена - Закон Москвы от 24.06.2015 г. № 32) + + 2. Неустранение ранее выявленного административного правонарушения, +предусмотренного частями 1 и 1.1 настоящей статьи, в течение 80 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частями 1 и 1.1 настоящей +статьи, - (В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от трехсот тысяч до пятисот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + (Часть дополнена - Закон Москвы от 10.03.2010 г. № 6) + + Статья 9.8. Переустройство объектов нежилого фонда без оформления +разрешительных документов + + 1. Переустройство (перепланировка, переоборудование) объектов нежилого +фонда, находящихся в собственности города Москвы, без оформления разрешительных +документов на переустройство (перепланировку, переоборудование) объекта нежилого +фонда, а также возведение пристроек (надстроек) без постановки на учет и +оформления прав владения - (В редакции Закона Москвы от 24.06.2015 г. № 32) + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до трех тысяч рублей; на должностных лиц - от десяти тысяч до тридцати +тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста пятидесяти тысяч +рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 2. Те же действия, повлекшие разрушение объекта нежилого фонда, - + + влекут наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от ста тысяч до двухсот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + 3. Неустранение ранее выявленного административного правонарушения, +предусмотренного частями 1 и 2 настоящей статьи, в течение 40 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частями 1 и 2 настоящей +статьи, - (В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от двухсот тысяч до пятисот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + (Часть дополнена - Закон Москвы от 10.03.2010 г. № 6) + + Статья 9.9. Нарушение порядка распоряжения объектом нежилого фонда + + (Утратила силу - Закон Москвы от 21.01.2015 г. № 1) + + Статья 9.10. Нарушение порядка распоряжения и использования объекта +нежилого фонда при осуществлении инвестиционной деятельности + + 1. Использование без получения разрешения специально уполномоченного органа +исполнительной власти города Москвы объекта нежилого фонда, переданного по +инвестиционному контракту, договору или соглашению для капитального ремонта или +реконструкции, в котором имеются доли города Москвы, для осуществления +деятельности, не связанной с реализацией (выполнением) инвестиционного +контракта, договора или соглашения, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от десяти тысяч до тридцати +тысяч рублей; на юридических лиц - от ста тысяч до трехсот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + 2. Распоряжение объектом нежилого фонда, в котором имеются доли города +Москвы, переданным по инвестиционному контракту, договору или соглашению для +капитального ремонта или реконструкции либо созданным в результате нового +строительства, без получения разрешения специально уполномоченного органа +исполнительной власти города Москвы - + + влечет наложение административного штрафа на должностных лиц в размере от +десяти тысяч до тридцати тысяч рублей; на юридических лиц - от двухсот тысяч до +пятисот тысяч рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 3. Использование объекта нежилого фонда, составляющего выделенную долю +города Москвы в общем имуществе, без получения разрешения специально +уполномоченного органа исполнительной власти города Москвы - + + влечет наложение административного штрафа на должностных лиц в размере от +десяти тысяч до тридцати тысяч рублей; на юридических лиц - от ста тысяч до +трехсот тысяч рублей. (В редакции Закона Москвы от 24.06.2015 г. № 32) + + 4. Неустранение ранее выявленного административного правонарушения, +предусмотренного частями 1 - 3 настоящей статьи, в течение 60 дней с момента +вступления в законную силу постановления о назначении административного +наказания за данное правонарушение, а равно повторное совершение +административного правонарушения, предусмотренного частями 1 - 3 настоящей +статьи, - (В редакции Закона Москвы от 29.04.2019 № 15) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до пяти тысяч рублей; на должностных лиц - от двадцати тысяч до пятидесяти +тысяч рублей; на юридических лиц - от двухсот тысяч до пятисот тысяч рублей. (В +редакции Закона Москвы от 24.06.2015 г. № 32) + + (Часть дополнена - Закон Москвы от 10.03.2010 г. № 6) + + Статья 9.11. Нарушение требований к содержанию особых объектов нежилого +фонда + + Нарушение установленных законодательством и иными правовыми актами города +Москвы требований к содержанию особых объектов нежилого фонда - + + влечет наложение административного штрафа на граждан в размере от тысячи до +одной тысячи пятисот рублей; на должностных лиц - от пяти тысяч до десяти тысяч +рублей; на юридических лиц - от двадцати пяти тысяч до пятидесяти тысяч рублей. +(В редакции Закона Москвы от 17.10.2012 г. № 50) + + Статья 9.12. Проведение работ по переустройству и (или) перепланировке +нежилого помещения в многоквартирном доме, не являющегося общим имуществом +собственников помещений в многоквартирном доме, без согласования уполномоченного +органа исполнительной власти города Москвы + + Проведение работ по переустройству и (или) перепланировке нежилого +помещения в многоквартирном доме, не являющегося общим имуществом собственников +помещений в многоквартирном доме, без согласования уполномоченного органа +исполнительной власти города Москвы или с нарушением условий такого +согласования, если его получение необходимо в соответствии с установленными +Правительством Москвы требованиями, за исключением случаев, установленных +статьей 9.8 настоящего Кодекса, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до двух тысяч пятисот рублей; на должностных лиц - от пяти тысяч до +пятидесяти тысяч рублей; на юридических лиц - от трехсот тысяч до трехсот +пятидесяти тысяч рублей. + + (Статья дополнена - Закон Москвы от 26.06.2013 г. № 36) + + Глава 10. Административные правонарушения на транспорте + + Статья 10.1. Безбилетный проезд и провоз неоплаченного багажа в наземном +городском транспорте общего пользования + + 1. Безбилетный проезд в наземном городском транспорте общего пользования +(трамваях, троллейбусах, автобусах), за исключением случаев, предусмотренных +частью 1.1 настоящей статьи, - + + влечет наложение административного штрафа на граждан в размере одной тысячи +рублей. (В редакции законов Москвы от 01.06.2011 г. № 21; от 31.10.2012 г. № +54) + + 1.1. Неправомерное использование льготной персонифицированной карты для +проезда в наземном городском транспорте общего пользования (трамваях, +троллейбусах, автобусах) - + + влечет наложение административного штрафа на граждан в размере двух тысяч +пятисот рублей. + + (Часть дополнена - Закон Москвы от 31.10.2012 г. № 54) + + 2. Провоз в наземном городском транспорте общего пользования неоплаченного +багажа, сумма измерений которого по длине, ширине и высоте превышает 120 +сантиметров, ручной клади допустимых размеров свыше одного места, а также лыж (в +чехле) свыше одной пары - + + влечет наложение административного штрафа на граждан в размере одной тысячи +рублей за каждое место багажа или лыж. (В редакции Закона Москвы от 01.06.2011 +г. № 21) + + Примечание. Под льготной персонифицированной картой понимается электронная +карта (материальный носитель), содержащая зафиксированную на ней в визуальной и +(или) электронной (машиносчитываемой) формах информацию, удостоверяющую право +гражданина пользоваться на территории города Москвы мерами социальной поддержки, +в том числе в виде льгот по оплате проезда в наземном городском транспорте +общего пользования (трамваях, троллейбусах, автобусах), метрополитене и +ММТС.(Дополнено - Закон Москвы от 31.10.2012 г. № 54) + + Статья 10.2. Невыдача проездных билетов пассажирам + + Невыдача проездных билетов оплатившим проезд пассажирам наземного +городского транспорта общего пользования, автобусов пригородного сообщения, +маршрутных микроавтобусов водителем или иным лицом, ответственным за выдачу +проездных билетов, - + + влечет наложение административного штрафа на водителя или иное лицо, +ответственное за выдачу проездных билетов, в размере от трехсот до одной тысячи +рублей. + + Статья 10.3. Провоз опасных веществ и предметов либо запрещенного к провозу +багажа в наземном городском транспорте общего пользования + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 10.4. Проезд в пачкающей одежде, провоз загрязняющих предметов и +вещей в наземном городском транспорте общего пользования + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 10.5. Нарушение порядка использования механизмов и оборудования +наземных транспортных средств общего пользования + + 1. Приведение в действие механизмов для открытия дверей в наземных +городских транспортных средствах общего пользования, препятствование закрытию и +открытию дверей, кроме необходимости предотвращения несчастных случаев, +нахождение пассажиров в кабине водителя - + + влечет наложение административного штрафа на граждан в размере ста рублей. + + 2. Использование сидений в наземных городских транспортных средствах общего +пользования не по их прямому назначению - + + влечет предупреждение или наложение административного штрафа на граждан в +размере ста рублей. + + Статья 10.6. Нарушение установленных Правительством Москвы требований к +организации транспортного обслуживания населения на маршрутах регулярных +перевозок пассажиров и багажа автомобильным транспортом и городским наземным +электрическим транспортом + + (Утратила силу - Закон Москвы от 23.11.2016 г. № 39) + + Статья 10.7. Эксплуатация несогласованных городских, пригородных, +междугородних и международных автобусных маршрутов + + (Утратила силу - Закон Москвы от 08.07.2015 г. № 43) + + Статья 10.8. Нарушение требований к оборудованию наземных транспортных +средств общего пользования отличительными знаками + + Осуществление перевозки пассажиров по маршрутам наземного городского +транспорта общего пользования с нарушением требований к оборудованию +транспортного средства отличительными знаками и информационному обеспечению +пассажиров транспортного средства - + + влечет наложение административного штрафа на водителя или на должностных +лиц в размере от пятисот до одной тысячи рублей; на юридических лиц - от пятисот +до одной тысячи пятисот рублей. + + Статья 10.8.1. Несанкционированное нанесение надписей, изображений, +размещение информационных и рекламных материалов в наземном городском транспорте +общего пользования, вагонах поездов метрополитена и ММТС + + Нанесение надписей, изображений, размещение информационных и рекламных +материалов на внешние и внутренние поверхности наземного городского транспорта +общего пользования, а также вагонов поездов метрополитена и ММТС, кроме +относящихся к порядку их эксплуатации и (или) наносимых (размещаемых) в рамках +исполнения государственного контракта или иного договора, заключенного с +уполномоченным органом исполнительной власти города Москвы, государственным +унитарным предприятием города Москвы или государственным учреждением города +Москвы, - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от тридцати пяти тысяч до +пятидесяти тысяч рублей; на юридических лиц - от пятидесяти тысяч до ста +пятидесяти тысяч рублей. + + (Статья дополнена - Закон Москвы от 08.07.2015 г. № 46) + + Статья 10.9. Нарушение правил пользования метрополитеном и ММТС + + 1. Абзац;(Утратил силу - Закон Москвы от 01.06.2011 г. № 21) + + абзац;(Утратил силу - Закон Москвы от 01.06.2011 г. № 21) + + размещение багажа на сиденьях вагонов и скамейках станций; + + провоз огнестрельного оружия, колющих и легкобьющихся предметов без чехлов +(упаковки), в том числе лыж и коньков, мелкого садового инструмента с открытыми +острыми частями; + + провоз животных и птиц вне клеток и специальных контейнеров (сумок); + + провоз велосипедов, за исключением складных, и иных транспортных средств, +кроме детских и инвалидных колясок; + + провоз громоздкого багажа, сумма измерений которого по длине, ширине и +высоте превышает 150 сантиметров, длинномерных предметов, длина которых свыше +220 сантиметров; + + использование для транспортировки багажа колесных средств (за исключением +чемоданов, сумок-тележек, размеры которых вместе с кладью не превышают по сумме +измерений длины, ширины и высоты 150 сантиметров); + + использование пассажирами средств звукоусиления (кроме слуховых аппаратов) +и прослушивание аудиозаписей или аудиотрансляции без наушников; + + проезд в поездах, не осуществляющих или прекративших перевозку пассажиров; + + спуск пассажиров на пути движения поездов и ходовую балку ММТС; + + передвижение по территории станции и по подуличным переходам метрополитена +на мотоциклах, велосипедах, самокатах, роликовых коньках и иных транспортных и +спортивных средствах, кроме инвалидных колясок; + + нахождение на территории метрополитена и ММТС в пачкающей, зловонной +одежде, с багажом, предметами, продуктами (в том числе напитками и мороженым в +открытой таре), которые могут испачкать пассажиров, вагоны, сооружения и +устройства станций; + + умышленное создание ситуаций, мешающих движению пассажиропотока; + + осуществление кино-, видеосъемки без письменного разрешения руководства +метрополитена и ММТС; + + бег по эскалатору, сидение и установка вещей на ступенях и поручнях +эскалатора; + + пользование без надобности связью "пассажир - машинист", связью с +ситуационными центрами метрополитена и органов внутренних дел на метрополитене, +краном выключения дверей в вагонах, рукояткой остановки эскалатора; + + засорение и загрязнение сооружений, устройств пути, оборудования, +эскалаторов, подвижного состава, территории метрополитена и ММТС; + + перемещение по неработающему эскалатору без разрешения работников +метрополитена и ММТС - + + влечет предупреждение или наложение административного штрафа на граждан в +размере ста рублей. + + 2. Открывание дверей вагонов во время движения и остановок, а также +воспрепятствование их открытию и закрытию на остановках; + + провоз легковоспламеняющихся, взрывчатых, отравляющих, ядовитых веществ и +предметов, в том числе бытовых газовых баллонов; + + подкладывание на пути метрополитена или ходовую балку ММТС посторонних +предметов; + + самовольное проникновение в производственные помещения и на огражденную +территорию метрополитена и ММТС; + + подключение электрической нагрузки к сетям метрополитена без письменного +разрешения руководства метрополитена и ММТС - + + влечет наложение административного штрафа на граждан в размере от ста до +одной тысячи рублей; на должностных лиц - от одной тысячи до двух тысяч рублей; +на юридических лиц - от двух тысяч до четырех тысяч рублей. + + 2.1. Неоплаченный проход за пределы пропускных пунктов станций +метрополитена и ММТС, за исключением случаев, предусмотренных частью 2.2 +настоящей статьи, а также неоплаченный провоз каждого места багажа, сумма +измерений которого по длине, ширине и высоте находится в пределах от 121 до 150 +сантиметров, длинномерных предметов, длина которых от 151 до 220 сантиметров, - + + влечет наложение административного штрафа на граждан в размере одной тысячи +рублей. + + (Часть в редакции Закона Москвы от 31.10.2012 г. № 54) + + 2.2. Неправомерное использование льготной персонифицированной карты для +прохода за пределы пропускных пунктов станций метрополитена и ММТС - + + влечет наложение административного штрафа на граждан в размере двух тысяч +пятисот рублей. + + (Часть дополнена - Закон Москвы от 31.10.2012 г. № 54) + + 3. Использование территории метрополитена и территории ММТС для занятий +предпринимательской и иной деятельностью без письменного разрешения руководства +метрополитена и ММТС; + + абзац; (Утратил силу - Закон Москвы от 08.07.2015 г. № 46) + + нанесение повреждения сооружениям, устройствам пути, оборудованию и +подвижному составу метрополитена и ММТС; + + реализация на территории метрополитена и ММТС транспортных карт и билетов +для проезда в метрополитене и ММТС, а также предоставление проходов по ним за +пределы пропускных пунктов станций метрополитена и ММТС лицами, не +уполномоченными на это руководством метрополитена и ММТС, - + + влечет наложение административного штрафа на граждан в размере от ста до +двух тысяч рублей; на должностных лиц - от двух тысяч до четырех тысяч рублей; +на юридических лиц - от пяти тысяч до десяти тысяч рублей. + + 4. Занятие попрошайничеством на территории метрополитена и ММТС, а также в +25-метровой зоне от наземных вестибюлей станций и сооружений метрополитена и +ММТС - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от двух тысяч пятисот до пяти тысяч рублей. + + (Часть дополнена - Закон Москвы от 13.05.2015 г. № 25) + + Статья 10.10. Воспрепятствование выполнению служебных обязанностей +сотрудниками организаций городского и пригородного транспорта общего пользования + + 1. Воспрепятствование выполнению служебных обязанностей сотрудниками +организаций городского и пригородного транспорта общего пользования (водителями, +машинистами, контролерами, контролерамиревизорами, кондукторами, дежурным +персоналом станций), а равно невыполнение их законных требований - + + влечет наложение административного штрафа на граждан в размере от ста до +трехсот рублей. + + 2. Те же действия, повлекшие создание препятствий для движения городского и +пригородного транспорта общего пользования, - + + влекут наложение административного штрафа на граждан в размере от пятисот +до одной тысячи рублей. + + Статья 10.11. Нарушение законодательства в области таксомоторных перевозок + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Глава 11. Административные правонарушения в области предпринимательской +деятельности + + Статья 11.1. Осуществление мелкорозничной торговли, оказание бытовых услуг +либо услуг общественного питания без документов, подтверждающих право размещения +объектов мелкорозничной сети + + Осуществление мелкорозничной торговли, оказание бытовых услуг либо услуг +общественного питания без документов, подтверждающих право размещения объектов +мелкорозничной сети, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч пятисот рублей; на должностных лиц - от трех тысяч до пяти +тысяч рублей; на юридических лиц - от тридцати тысяч до пятидесяти тысяч рублей. + + Статья 11.2. Осуществление мелкорозничной торговли (оказание услуг) с +нарушением установленных требований к специализации объектов мелкорозничной сети + + Осуществление мелкорозничной торговли (оказание услуг) с нарушением +установленных требований к специализации объектов мелкорозничной сети - + + влечет наложение административного штрафа на должностных лиц в размере от +трех тысяч до четырех тысяч рублей; на юридических лиц - от десяти тысяч до +двадцати тысяч рублей. + + Статья 11.3. Продажа печатной продукции эротического характера с нарушением +установленных требований + + Осуществление розничной продажи продукции средств массовой информации, +специализирующихся на сообщениях и материалах эротического характера, в +неустановленных местах и с нарушением требований по реализации печатной +продукции эротического характера - + + влечет наложение административного штрафа на должностных лиц от десяти +тысяч до двадцати тысяч рублей; на юридических лиц - от тридцати тысяч до +пятидесяти тысяч рублей. + + Статья 11.4. Нарушение порядка применения установленных правовыми актами +города Москвы цен (тарифов), расценок, нормативов, коэффициентов, ставок +планово-нормативных расходов + + Нарушение должностным лицом органа исполнительной власти города Москвы, +государственного учреждения города Москвы, государственного унитарного +(казенного) предприятия города Москвы порядка применения установленных правовыми +актами города Москвы цен (тарифов), предельных, единичных и комплексных +расценок, территориальных сметных нормативов, коэффициентов, ставок +планово-нормативных расходов, а равно их завышение, за исключением случаев, +когда цены (тарифы) подлежат государственному регулированию в соответствии с +федеральным законодательством, - + + влечет наложение административного штрафа в размере пятидесяти тысяч +рублей. + + (Статья в редакции Закона Москвы от 24.06.2015 г. № 38) + + Статья 11.5. Нарушение требований к размещению объектов игорного бизнеса + + (Утратила силу - Закон Москвы от 14.12.2011 г. № 64) + + Статья 11.6. Нарушение правил размещения, регистрации и эксплуатации +аттракционной техники + + Нарушение правил размещения, регистрации и эксплуатации аттракционной +техники - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до двух тысяч рублей; на юридических лиц - от двух тысяч до пяти +тысяч рублей. + + Статья 11.7. Нарушение порядка лицензирования розничной продажи алкогольной +продукции + + (Утратила силу - Закон Москвы от 23.11.2011 г. № 55) + + Статья 11.8. Изготовление, хранение и перевозки некачественных, опасных и +фальсифицированных пищевых продуктов + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 11.9. Отсутствие производственного контроля при изготовлении и (или) +обороте пищевых продуктов + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 11.10. Нарушение порядка доведения до сведения потребителей +информации о моторном топливе + + (Утратила силу - Закон Москвы от 11.12.2019 № 34) + + Статья 11.11. Самовольное размещение и (или) использование газобаллонных +установок на объектах потребительского рынка + + Самовольное размещение и (или) использование на объектах потребительского +рынка газобаллонных установок, то есть установок снабжения сжиженным газом, в +состав которых входят не более двух баллонов емкостью от 5 до 50 литров каждый, +регулятор давления, газопровод, запорное устройство и газопотребляющее +оборудование, - + + влечет наложение административного штрафа на граждан и должностных лиц в +размере от пятисот до двух тысяч рублей; на юридических лиц - от семи тысяч +пятисот до двадцати тысяч рублей. + + Статья 11.12. Нарушение нормативов по размещению, техническому обслуживанию +и безопасному использованию газобаллонных установок на объектах потребительского +рынка + + Нарушение установленных правовыми актами города Москвы нормативов, +определяющих технические условия на размещение и монтаж газобаллонных установок +на объектах потребительского рынка, требования регламента технического +обслуживания газобаллонных установок и требования по их безопасному +использованию на объектах потребительского рынка, - + + влечет наложение административного штрафа на граждан и должностных лиц в +размере от пятисот до одной тысячи рублей; на юридических лиц - от семи тысяч +пятисот до пятнадцати тысяч рублей. + + Статья 11.13. Осуществление торговой деятельности, оказание услуг вне +специально отведенных для этого мест + + 1. Осуществление торговой деятельности, оказание услуг вне специально +отведенных для этого мест - (В редакции Закона Москвы от 26.06.2013 г. № 35) + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до пяти тысяч рублей; на должностных лиц - от пяти тысяч до десяти +тысяч рублей; на юридических лиц - от пятидесяти тысяч до двухсот тысяч рублей. +(В редакции законов Москвы от 07.10.2009 г. № 42; от 26.06.2013 г. № 35) + + 2. Те же действия, совершенные повторно, а также совершенные в вестибюлях +станций, подуличных переходах и других сооружениях метрополитена, на территории +ММТС, в 50-метровой зоне от наземных вестибюлей станций и сооружений +метрополитена и территории ММТС, на территориях объектов (зданий, строений, +сооружений) транспортно-пересадочных узлов и в 50-метровой зоне от указанных +объектов, - (В редакции Закона Москвы от 24.01.2018 г. № 3) + + влекут наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - пятидесяти тысяч рублей; на юридических лиц - +пятисот тысяч рублей. + + (Часть дополнена - Закон Москвы от 26.06.2013 г. № 35) + + Статья 11.14. Нарушение порядка организации похоронного дела + + Нарушение установленного Правительством Москвы порядка организации +похоронного дела в городе Москве - + + влечет наложение административного штрафа на должностных лиц в размере от +пяти тысяч до десяти тысяч рублей; на юридических лиц - от сорока тысяч до +пятидесяти тысяч рублей. + + Статья 11.15. Нарушение правил работы кладбищ и крематориев и порядка их +содержания + + Нарушение установленных Правительством Москвы правил работы кладбищ и +крематориев и порядка их содержания - + + влечет наложение административного штрафа на граждан в размере от пятисот +до одной тысячи рублей; на должностных лиц - от двух тысяч до четырех тысяч +рублей; на юридических лиц - от десяти тысяч до пятнадцати тысяч рублей. + + Глава 12. Административные правонарушения в области бюджетных +правоотношений + + (Утратила силу - Закон Москвы от 12.12.2012 г. № 65) + + Глава 12.1. Административные правонарушения в сфере планирования размещения +государственного заказа и исполнения обязательств по государственным контрактам + + (Утратила силу - Закон Москвы от 24.06.2015 г. № 38) + + Глава 13. Административные правонарушения в области охраны и использования +объектов культурного наследия (памятников истории и культуры) регионального и +местного (муниципального) значения, их территорий и зон охраны + + Статья 13.1. Нарушение требований сохранения, охраны и использования +объектов культурного наследия (памятников истории и культуры) регионального и +местного (муниципального) значения, их территорий и зон охраны + + 1. Нарушение требований сохранения, охраны и использования объектов +культурного наследия (памятников истории и культуры) регионального и местного +(муниципального) значения, включенных в Городской реестр недвижимого культурного +наследия (Перечень объектов исторического и культурного наследия регионального и +местного (муниципального) значения), их территорий, а равно несоблюдение +ограничений, установленных в зонах их охраны, - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до пяти тысяч рублей; на должностных лиц - от двадцати пяти тысяч +до пятидесяти тысяч рублей; на юридических лиц - от пятисот тысяч до одного +миллиона рублей. + + 2. Действия (бездействие), предусмотренные частью 1 настоящей статьи, +совершенные на территориях историко-культурных заповедников +(музеев-заповедников) регионального и местного (муниципального) значения, а +равно в зонах их охраны, - + + влекут наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - пятидесяти тысяч рублей; на юридических лиц - +одного миллиона рублей. + + Статья 13.2. Уклонение от оформления (переоформления), заключения +(перезаключения) охранного обязательства, договора аренды объекта культурного +наследия (памятника истории и культуры), договора безвозмездного пользования +объектом культурного наследия (охранного договора) + + Уклонение собственника, арендатора, пользователя объекта культурного +наследия (памятника истории и культуры) от оформления (переоформления), +заключения (перезаключения) в случаях, установленных законом, охранного +обязательства, договора аренды объекта культурного наследия (охранно-арендного +договора), договора безвозмездного пользования объектом культурного наследия +(охранного договора) в срок, установленный законодательством или предписанием +органа исполнительной власти города Москвы, уполномоченного в области +государственной охраны, сохранения, использования и популяризации объектов +культурного наследия (памятников истории и культуры) народов Российской +Федерации, - + + влечет наложение административного штрафа на граждан в размере пяти тысяч +рублей; на должностных лиц - пятидесяти тысяч рублей; на юридических лиц - +одного миллиона рублей. + + Статья 13.3. Неисполнение обязанностей по установке и содержанию +информационных надписей и обозначений на объектах культурного наследия +(памятниках истории и культуры) + + 1. Неисполнение собственниками объектов культурного наследия (памятников +истории и культуры) обязанностей по установке на указанных объектах +информационных надписей и обозначений - + + влечет предупреждение или наложение административного штрафа на граждан в +размере двух тысяч рублей; на должностных лиц - десяти тысяч рублей; на +юридических лиц - пятидесяти тысяч рублей. + + 2. Нарушение собственниками, арендаторами, пользователями объектов +культурного наследия (памятников истории и культуры) требований к содержанию в +надлежащем виде установленных на указанных объектах информационных надписей и +обозначений - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до двух тысяч рублей; на должностных лиц - от пяти тысяч +до десяти тысяч рублей; на юридических лиц - от двадцати тысяч до пятидесяти +тысяч рублей. + + Статья 13.4. Нарушение порядка обеспечения доступа граждан к объектам +культурного наследия (памятникам истории и культуры) + + Действия (бездействия), препятствующие доступу граждан к объекту +культурного наследия (памятнику истории и культуры) в соответствии с +законодательством, - + + влекут наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч пятисот рублей; на должностных лиц - от десяти тысяч до +двадцати пяти тысяч рублей; на юридических лиц - от двухсот тысяч до пятисот +тысяч рублей. + + Глава 14. Административные правонарушения, посягающие на + + институты государственной власти и местного самоуправления + + Статья 14.1. Невыполнение законных требований депутата Московской городской +Думы или депутата муниципального Собрания внутригородского муниципального +образования в городе Москве + + 1. Невыполнение должностным лицом органа государственной власти города +Москвы, иного государственного органа города Москвы, органа местного +самоуправления внутригородского муниципального образования в городе Москве, +организации или общественного объединения законных требований депутата +Московской городской Думы или депутата муниципального Собрания внутригородского +муниципального образования в городе Москве либо создание препятствий в +осуществлении его деятельности - + + влечет наложение административного штрафа на должностных лиц в размере от +двух тысяч до трех тысяч рублей. + + 2. Несоблюдение должностным лицом, указанным в части 1 настоящей статьи, +установленных сроков предоставления информации (документов, материалов, ответов +на обращения и на депутатские запросы) депутату Московской городской Думы или +депутату муниципального Собрания внутригородского муниципального образования в +городе Москве - + + влечет наложение административного штрафа на должностных лиц в размере от +одной тысячи до двух тысяч рублей. + + Статья 14.2. Воспрепятствование деятельности Уполномоченного по правам +ребенка в городе Москве + + Вмешательство в деятельность Уполномоченного по правам человека в городе +Москве с целью повлиять на его решение, неисполнение должностными лицами +обязанностей, установленных законом города Москвы, регулирующим вопросы +деятельности Уполномоченного по правам ребенка в городе Москве, а равно +воспрепятствование деятельности Уполномоченного по правам ребенка в городе +Москве в иной форме - (В редакции Закона Москвы от 07.10.2009 г. № 42) + + влечет наложение административного штрафа на граждан в размере от пятисот +до одной тысячи рублей; на должностных лиц - от четырех тысяч до пяти тысяч +рублей. + + Статья 14.3. Надругательство над гербом и (или) флагом города Москвы + + Надругательство над гербом и (или) флагом города Москвы - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч пятисот до пяти тысяч рублей; на должностных лиц - от двадцати до тридцати +тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 14.4. Неправомерное использование герба и (или) флага города Москвы + + 1. Использование герба и (или) флага города Москвы без официального +разрешения в тех случаях, когда в соответствии с правовыми актами города Москвы +такое разрешение требуется, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от трех тысяч до пяти тысяч рублей; на должностных лиц - от пяти тысяч +до семи тысяч рублей; на юридических лиц - от десяти тысяч до пятнадцати тысяч +рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + 2. Неправомерное использование герба и (или) флага города Москвы на +бланках, печатях, штампах и вывесках - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от двух тысяч пятисот до пяти тысяч рублей; на должностных лиц - от +пятнадцати тысяч до двадцати пяти тысяч рублей; на юридических лиц - от тридцати +тысяч до пятидесяти тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № +42) + + Статья 14.5. Невыполнение обязанности по вывешиванию флага города Москвы + + Невыполнение обязанности по вывешиванию флага города Москвы в дни +государственных праздников Российской Федерации и праздников города Москвы, а +также в иных случаях, установленных правовыми актами города Москвы, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до двух тысяч рублей; на должностных лиц - от двух тысяч +до трех тысяч рублей; на юридических лиц - от пяти тысяч до десяти тысяч рублей. +(В редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 14.6. Нарушение правил использования герба и (или) флага города +Москвы + + Нарушение правил использования герба и (или) флага города Москвы - + + влечет предупреждение или наложение административного штрафа в размере от +одной тысячи до трех тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № +42) + + Статья 14.7. Незаконное использование словесных символов или +архитектурно-мемориальных символов города Москвы + + 1. Использование словесных символов или архитектурно-мемориальных символов +города Москвы без официального разрешения в тех случаях, когда в соответствии с +правовыми актами города Москвы такое разрешение требуется, - + + влечет предупреждение или наложение административного штрафа на граждан в +размере от одной тысячи до двух тысяч рублей; на должностных лиц - от двух тысяч +до трех тысяч рублей; на юридических лиц - от десяти до пятнадцати тысяч рублей. +(В редакции Закона Москвы от 07.10.2009 г. № 42) + + 2. Нарушение правил использования словесных символов или +архитектурно-мемориальных символов города Москвы - + + влечет предупреждение или наложение административного штрафа в размере от +пятисот до одной тысячи рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 14.8. Незаконные действия по отношению к гербовым эмблемам и (или) +флагам административных округов города Москвы + + Использование гербовых эмблем и (или) флагов административных округов +города Москвы с нарушением требований, установленных правовыми актами города +Москвы, - + + влечет предупреждение или наложение административного штрафа в размере от +одной тысячи до двух тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № +42) + + Статья 14.9. Незаконные действия по отношению к гербам и (или) флагам +внутригородских муниципальных образований в городе Москве + + Использование гербов и (или) флагов внутригородских муниципальных +образований в городе Москве с нарушением требований, установленных правовыми +актами города Москвы и (или) правовыми актами органов местного самоуправления +внутригородских муниципальных образований в городе Москве, - + + влечет предупреждение или наложение административного штрафа в размере от +одной тысячи до двух тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № +42) + + Статья 14.10. Незаконное изготовление и использование Знамени города +Москвы, должностных знаков Мэра Москвы и Председателя Московской городской Думы, +нагрудных знаков депутатов Московской городской Думы и членов Правительства +Москвы + + 1. Незаконное изготовление и использование Знамени города Москвы, +должностных знаков Мэра Москвы и Председателя Московской городской Думы, +нагрудных знаков депутатов Московской городской Думы и членов Правительства +Москвы - + + влечет наложение административного штрафа на граждан в размере от двух +тысяч до трех тысяч рублей; на должностных лиц - от трех тысяч до четырех тысяч +рублей; на юридических лиц - от четырех тысяч до пяти тысяч рублей. (В редакции +Закона Москвы от 07.10.2009 г. № 42) + + 2. Незаконное использование изображения Знамени города Москвы, изображений +должностных знаков Мэра Москвы и Председателя Московской городской Думы, +нагрудных знаков депутатов Московской городской Думы и членов Правительства +Москвы - + + влечет предупреждение или наложение административного штрафа в размере от +тысячи до трех тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + Статья 14.11. Незаконное изготовление, использование (ношение) наград, +нагрудных знаков к почетным званиям города Москвы и атрибутов к ним + + 1. Незаконное изготовление наград, нагрудных знаков к почетным званиям +города Москвы и атрибутов к ним, а равно учреждение и изготовление наград, +знаков почетных званий и атрибутов к ним, сходных до степени смешения с +наградами, знаками почетных званий города Москвы и атрибутами к ним, - + + влечет наложение административного штрафа на граждан в размере от одной +тысячи до двух тысяч рублей; на должностных лиц - от двух тысяч до трех тысяч +рублей; на юридических лиц - от трех тысяч до пяти тысяч рублей. (В редакции +Закона Москвы от 07.10.2009 г. № 42) + + 2. Незаконное использование (ношение) наград, нагрудных знаков к почетным +званиям города Москвы и атрибутов к ним - + + влечет наложение административного штрафа на граждан в размере от трех +тысяч до пяти тысяч рублей; на должностных лиц - от пяти тысяч до десяти тысяч +рублей; на юридических лиц - от десяти тысяч до пятнадцати тысяч рублей. (В +редакции Закона Москвы от 07.10.2009 г. № 42) + + 3. Незаконное использование изображений наград, нагрудных знаков к почетным +званиям города Москвы и атрибутов к ним - + + влечет предупреждение или наложение административного штрафа в размере от +одной тысячи до пяти тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № +42) + + Статья 14.12. Незаконные действия по отношению к иным официальным знакам +отличия и различия в городе Москве + + Использование иных, помимо указанных в статьях 14.10 и 14.11 настоящего +Кодекса, официальных знаков отличия и различия, учрежденных в установленном +порядке государственными органами города Москвы или органами местного +самоуправления внутригородских муниципальных образований в городе Москве, с +нарушением требований, установленных правовыми актами города Москвы и (или) +правовыми актами органов местного самоуправления внутригородских муниципальных +образований в городе Москве, - + + влечет предупреждение или наложение административного штрафа в размере от +тысячи до двух тысяч рублей. (В редакции Закона Москвы от 07.10.2009 г. № 42) + + Глава 15. Административные правонарушения против порядка управления + + Статья 15.1. Неповиновение законному распоряжению должностного лица органа, +осуществляющего государственный надзор (контроль) + + (Утратила силу - Закон Москвы от 07.10.2009 г. № 42) + + Статья 15.2. Невыполнение в срок законного предписания (постановления, +представления, решения) органа (должностного лица), осуществляющего +государственный надзор (контроль) + + (Утратила силу - Закон Москвы от 07.10.2009 г. № 42) + + Статья 15.3. Непринятие мер по устранению причин и условий, +способствовавших совершению административного правонарушения + + (Утратила силу - Закон Москвы от 07.10.2009 г. № 42) + + Статья 15.4. Непредставление сведений (информации) + + (Утратила силу - Закон Москвы от 07.10.2009 г. № 42) + + Глава 16. Составление протоколов и рассмотрение дел об административных +правонарушениях + + Статья 16.1. Мировые судьи и органы, уполномоченные рассматривать дела об +административных правонарушениях + + Дела об административных правонарушениях, предусмотренных настоящим +Кодексом, рассматриваются в пределах компетенции, установленной статьей 16.3 +настоящего Кодекса: + + 1) мировыми судьями; + + 2) комиссиями по делам несовершеннолетних и защите их прав, создаваемыми в +соответствии с законом города Москвы; + + 3) органами исполнительной власти города Москвы, учреждениями органов +исполнительной власти города Москвы, уполномоченными на то исходя из задач и +функций, возложенных на них законами города Москвы либо нормативными правовыми +актами Мэра Москвы или Правительства Москвы; (В редакции Закона Москвы от +01.06.2011 г. № 21) + + 4) административными комиссиями, создаваемыми в соответствии с законом +города Москвы. + + Статья 16.2. Полномочия должностных лиц по рассмотрению дел об +административных правонарушениях + + Дела об административных правонарушениях, предусмотренных настоящим +Кодексом, рассматривают от имени органов, указанных в пункте 3 статьи 16.1 +настоящего Кодекса: + + 1) руководители соответствующих органов исполнительной власти города +Москвы, их заместители, руководители учреждений соответствующих органов +исполнительной власти города Москвы, их заместители; (В редакции Закона Москвы +от 01.06.2011 г. № 21) + + 2) руководители структурных подразделений соответствующих органов +исполнительной власти города Москвы, их заместители, руководители структурных +подразделений учреждений соответствующих органов исполнительной власти города +Москвы, их заместители; (В редакции Закона Москвы от 01.06.2011 г. № 21) + + 3) (Утратил силу - Закон Москвы от 12.12.2012 г. № 65) + + Статья 16.3. Компетенция мировых судей и органов по рассмотрению дел об +административных правонарушениях + + Дела об административных правонарушениях, предусмотренных настоящим +Кодексом, рассматривают: + + 1) мировые судьи - дела об административных правонарушениях, +предусмотренных статьями 2.4, 2.5, 2.8, 3.1 - 3.3, 3.6, 3.8 - 3.11, 5.7, 8.13; +статьями 10.1, 10.5, частью 1 статьи 10.9 (в случае, если лицо, в отношении +которого возбуждено дело об административном правонарушении, оспаривает наличие +события правонарушения и (или) назначенное ему административное наказание либо +отказывается от уплаты административного штрафа на месте совершения +административного правонарушения); статьями 10.2, 10.8, частями 2 и 3 статьи +10.9, статьями 10.10 и 11.3, главой 14 (за исключением статьи 14.5) настоящего +Кодекса; (В редакции законов Москвы от 12.12.2012 г. № 65; от 09.09.2015 г. № +49; от 23.11.2016 г. № 39) + + 2) районные комиссии по делам несовершеннолетних и защите их прав - дела об +административных правонарушениях, совершенных несовершеннолетними, а также дела +об административных правонарушениях, предусмотренных статьей 2.10.1 настоящего +Кодекса; (В редакции Закона Москвы от 30.01.2019 № 4) + + 3) (Утратил силу - Закон Москвы от 12.12.2012 г. № 65) + + 4) уполномоченный орган исполнительной власти города Москвы, осуществляющий +региональный государственный контроль за соблюдением требований в сфере +благоустройства в городе Москве, систематическое наблюдение (мониторинг) за +состоянием объектов благоустройства в городе Москве с применением мер +административного воздействия при выявлении нарушений, контроль за соблюдением +подрядными организациями города Москвы правил и технологии производства работ по +государственным контрактам в сфере благоустройства городских территорий +(ведомственный контроль), региональный государственный надзор за обеспечением +сохранности автомобильных дорог регионального и межмуниципального значения и +региональный государственный надзор в области технического состояния самоходных +машин и других видов техники, а также аттракционной техники, - дела об +административных правонарушениях, предусмотренных статьей 3.13 (в отношении +юридических лиц); частями 1-3 статьи 3.18.1; частью 1 статьи 3.20 (в отношении +юридических лиц), статьями 7.8-7.25, 8.2-8.8, 8.10; статьей 8.11 (в отношении +юридических лиц); частями 1 и 2 статьи 8.12; частями 1-4 статьи 8.13 (в +отношении юридических лиц); статьями 8.15, 8.16, 8.18-8.23, 8.26, 11.6, 14.5 +настоящего Кодекса. От имени указанного органа рассматривать дела об +административных правонарушениях, предусмотренных частью 2 статьи 3.18.1; частью +3 статьи 3.18.1 (в отношении граждан), вправе инспекторы; (В редакции законов +Москвы от 13.03.2013 № 11, от 26.06.2013 № 35, от 22.01.2014 № 2, от 13.05.2015 +№ 26, от 24.06.2015 № 32, от 08.07.2015 № 46, от 24.01.2018 № 3, от 22.05.2019 № +20, от 01.04.2020 № 6) + + 5) (Утратил силу - Закон Москвы от 12.12.2012 г. № 65) + + 6) орган исполнительной власти города Москвы, осуществляющий +государственное управление в области охраны окружающей среды в городе Москве, - +дела об административных правонарушениях, предусмотренных статьей 3.20, главой 4 +(за исключением части 1 статьи 4.50), частью 6 статьи 5.1, статьями 8.15-8.17 +настоящего Кодекса; (В редакции законов Москвы от 13.03.2013 г. № 11; от +22.01.2014 г. № 2; от 17.12.2014 г. № 61; от 13.05.2015 г. № 26; от 23.03.2016 +г. № 13; от 20.09.2017 г. № 35) + + 7) (Утратил силу - Закон Москвы от 07.10.2009 г. № 42) + + 8) (Утратил силу - Закон Москвы от 03.04.2013 г. № 13) + + 9) уполномоченный орган исполнительной власти города Москвы в области +транспорта, учреждения уполномоченного органа исполнительной власти города +Москвы в области транспорта - дела об административных правонарушениях, +предусмотренных частью 4 статьи 3.18.1, статьей 5.3, частью 5 статьи 8.13, +статьями 8.14, 10.1, 10.2, 10.5, 10.8, 10.8.1, 10.9; частью 2 статьи 11.13 (в +случае, если административное правонарушение совершено в вестибюлях станций, +подуличных переходах и других сооружениях метрополитена, на территории ММТС, в +50-метровой зоне от наземных вестибюлей станций и сооружений метрополитена и +территории ММТС, на территориях объектов (зданий, строений, сооружений) +транспортно-пересадочных узлов и в 50-метровой зоне от указанных объектов) +настоящего Кодекса. От имени указанных органа, учреждений рассматривать дела об +административных правонарушениях, предусмотренных статьями 8.14, 10.1, 10.5, +частями 2.1 и 2.2 статьи 10.9 настоящего Кодекса, вправе контролеры, +контролеры-ревизоры; (В редакции законов Москвы от 12.12.2012 № 65, от +16.01.2013 № 1, от 27.03.2013 № 12, от 26.06.2013 № 36, от 08.07.2015 № 43, от +08.07.2015 № 46, от 09.09.2015 № 49, от 23.11.2016 № 39, от 24.01.2018 № 3, от +11.12.2019 № 34, от 01.04.2020 № 6) + + 10) уполномоченный орган исполнительной власти города Москвы по контролю за +использованием нежилого фонда и земель - дела об административных +правонарушениях, предусмотренных главой 6, статьями 9.4, 9.6-9.8, 9.10 +настоящего Кодекса; (В редакции законов Москвы от 12.12.2012 г. № 65; от +21.01.2015 г. № 1; от 24.06.2015 г. № 32) + + 11) уполномоченный орган исполнительной власти города Москвы в области +энергосбережения - дела об административных правонарушениях, предусмотренных +статьями 7.27-7.29 настоящего Кодекса; (В редакции Закона Москвы от 12.12.2012 +г. № 65) + + 12) уполномоченный орган исполнительной власти города Москвы по +региональному государственному жилищному надзору - дела об административных +правонарушениях, предусмотренных статьями 9.1 и 9.12 настоящего Кодекса; (В +редакции Закона Москвы от 26.06.2013 г. № 36) + + 13) уполномоченный орган исполнительной власти города Москвы по контролю за +выполнением требований организации технической эксплуатации, ремонта и +мониторинга особых объектов - дела об административных правонарушениях, +предусмотренных частью 3 статьи 8.12, статьей 9.11 настоящего Кодекса; (В +редакции законов Москвы от 10.12.2008 г. № 64; от 12.12.2012 г. № 65) + + 14) уполномоченный орган исполнительной власти города Москвы, +осуществляющий функции по контролю и надзору в финансово-бюджетной сфере, - дела +об административных правонарушениях, предусмотренных частями 1-3 статьи 3.18.1, +статьей 11.4 настоящего Кодекса; (В редакции законов Москвы от 12.12.2012 № 65, +от 24.06.2015 № 38, от 01.04.2020 № 6) + + 15) (Утратил силу - Закон Москвы от 12.12.2012 г. № 65) + + 16) уполномоченный орган исполнительной власти города Москвы в области +государственной охраны, сохранения, использования и популяризации объектов +культурного наследия (памятников истории и культуры) народов Российской +Федерации - дела об административных правонарушениях, предусмотренных главой 13 +настоящего Кодекса; (В редакции Закона Москвы от 12.12.2012 г. № 65) + + 17) Административная комиссия при Правительстве Москвы по делам об +административных правонарушениях - дела об административных правонарушениях, +предусмотренных статьями 3.15, 3.16, 3.18, 3.19, 7.8 (в отношении граждан и +должностных лиц); статьями 7.11-7.25 (в отношении граждан и должностных лиц, а +также дела об административных правонарушениях, совершенных в отношении защитных +сооружений гражданской обороны и артезианских скважин); статьями 7.30, 8.1, +частью 1 статьи 8.2, статьями 8.2.1, 8.3.1 (в отношении должностных лиц), +статьями 8.10, 8.16, 8.18-8.22 (в отношении граждан и должностных лиц); статьями +11.14, 11.15 настоящего Кодекса; (В редакции законов Москвы от 13.03.2013 г. № +11; от 09.09.2015 г. № 49; от 24.01.2018 г. № 3) + + 18) административные комиссии префектур административных округов города +Москвы по делам об административных правонарушениях - дела об административных +правонарушениях, предусмотренных статьями 2.2, 2.9, 3.18, 3.19, 7.2, 7.3; +статьями 7.9-7.11 (в отношении должностных лиц); статьями 7.12-7.25 (в отношении +граждан и должностных лиц, а также дела об административных правонарушениях, +совершенных в отношении защитных сооружений гражданской обороны); частью 2 +статьи 8.2, статьями 8.4, 8.10, 8.11 (в отношении должностных лиц); статьями +8.3, 8.5 - 8.8 (в отношении должностных лиц); статьями 8.23, 11.1, 11.2 +настоящего Кодекса; (В редакции законов Москвы от 07.10.2009 № 42, от +23.06.2010 № 28, от 12.12.2012 № 65, от 13.03.2013 № 11, от 22.05.2019 № 20) + + 18.1) административные комиссии префектур административных округов города +Москвы по делам об административных правонарушениях, если данные +административные округа города Москвы в соответствии с правовыми актами города +Москвы включают в себя поселения, - дела об административных правонарушениях, +предусмотренных статьями 2.10, 2.10.1, 2.11, 3.5, 3.12, 3.13, 3.18, 3.19; +частями 1-5, 7-9 статьи 5.1, статьями 5.3-5.6, 5.8; статьями 7.9-7.11 (в +отношении граждан и должностных лиц); статьями 7.11-7.25 (в отношении граждан и +должностных лиц, а также дела об административных правонарушениях, совершенных в +отношении защитных сооружений гражданской обороны и артезианских скважин); +статьей 7.30; частью 2 статьи 8.2, статьями 8.4, 8.10, 8.11 (в отношении +должностных лиц); статьями 8.3, 8.5-8.8 (в отношении граждан и должностных лиц); +частями 1-4 статьи 8.13 (в отношении должностных лиц); статьями 11.11-11.13 +настоящего Кодекса; (В редакции законов Москвы от 20.11.2013 № 65, от +13.05.2015 № 26, от 08.07.2015 № 46, от 30.01.2019 № 4, от 22.05.2019 № 20) + + 19) административные комиссии управ районов города Москвы по делам об +административных правонарушениях - дела об административных правонарушениях, +предусмотренных статьями 2.10, 2.10.1, 2.11, 3.5, 3.12, 3.13, 3.18, 3.19; +частями 1 - 5, 7 - 9 статьи 5.1, статьями 5.3 - 5.6, 5.8; статьями 7.9 - 7.11 (в +отношении граждан и должностных лиц); статьями 7.11 - 7.25 (в отношении граждан +и должностных лиц, а также дела об административных правонарушениях, совершенных +в отношении защитных сооружений гражданской обороны и артезианских скважин); +статьей 7.30; частью 2 статьи 8.2, статьями 8.4, 8.10, 8.11 (в отношении +должностных лиц); статьями 8.3, 8.5 - 8.8 (в отношении граждан и должностных +лиц); частями 1-4 статьи 8.13 (в отношении должностных лиц); статьями 11.11, +11.12 настоящего Кодекса, за исключением случаев, установленных пунктом 18.1 +настоящей статьи; (В редакции законов Москвы от 12.12.2012 № 65, от 26.06.2013 +№ 35, от 20.11.2013 № 65, от 13.05.2015 № 26, от 08.07.2015 № 46, от 30.01.2019 +№ 4, от 22.05.2019 № 20) + + 20) уполномоченный орган исполнительной власти города Москвы в области +труда и занятости населения - дела об административных правонарушениях, +предусмотренных статьей 2.2 настоящего Кодекса; (Дополнен - Закон Москвы от +07.10.2009 г. № 42) + + 21) служба охраны особо охраняемых природных территорий - дела об +административных правонарушениях, предусмотренных частью 2 статьи 3.20, статьей +4.2, частью 1 статьи 4.21, статьями 4.29, частью 4 статьи 5.1 настоящего +Кодекса; (В редакции законов Москвы от 07.10.2009 г. № 42; от 12.12.2012 г. № +65; от 22.01.2014 г. № 2) + + 22) (Утратил силу - Закон Москвы от 24.06.2015 г. № 38) + + 23) должностные лица уполномоченных территориальных органов исполнительной +власти города Москвы - дела об административных правонарушениях, предусмотренных +статьей 11.13 настоящего Кодекса; (Дополнен - Закон Москвы от 26.06.2013 г. № +35) + + 24) должностные лица уполномоченного органа исполнительной власти города +Москвы в области потребительского рынка и услуг - дела об административных +правонарушениях, предусмотренных статьей 11.13 настоящего Кодекса. (Дополнен - +Закон Москвы от 26.06.2013 г. № 35) + + 25) уполномоченный орган исполнительной власти города Москвы, +осуществляющий контроль за предоставлением государственных услуг города Москвы, +- дела об административных правонарушениях, предусмотренных статьей 2.12 +настоящего Кодекса. (Дополнен - Закон Москвы от 04.06.2014 г. № 30) + + 26) орган исполнительной власти города Москвы, осуществляющий привлечение +лиц, совершивших административные правонарушения на транспорте, в области +дорожного движения, к административной ответственности, - дела об +административных правонарушениях, предусмотренных частью 4 статьи 3.18.1, частью +1 статьи 4.50, статьей 8.25 настоящего Кодекса. (В редакции законов Москвы от +17.12.2014 № 61, от 23.03.2016 № 13, от 01.04.2020 № 6) + + Статья 16.4. Производство по делам об административных правонарушениях и +исполнение постановлений о назначении административных наказаний + + Производство по делам об административных правонарушениях, предусмотренных +настоящим Кодексом, и исполнение постановлений о назначении административных +наказаний осуществляются в соответствии с Кодексом Российской Федерации об +административных правонарушениях. + + Статья 16.5. Должностные лица, уполномоченные составлять протоколы об +административных правонарушениях + + 1. Протоколы об административных правонарушениях, предусмотренных настоящим +Кодексом, составляются должностными лицами органов, уполномоченных рассматривать +дела об административных правонарушениях в соответствии с пунктами 1-16, 21, +23-26 статьи 16.3 настоящего Кодекса. (В редакции законов Москвы от 07.10.2009 +г. № 42; от 10.03.2010 г. № 6; от 13.10.2010 г. № 40; от 26.06.2013 г. № 35; от +04.06.2014 г. № 30; от 17.12.2014 г. № 61; от 24.06.2015 г. № 38) + + 1.1) должностные лица органов внутренних дел (полиции, в том числе полиции +на транспорте) - об административных правонарушениях, предусмотренных статьей +3.18.1 настоящего Кодекса; (Дополнен - Закон Москвы от 01.04.2020 № 6) + + 2. Орган исполнительной власти города Москвы, осуществляющий +государственное управление в области охраны окружающей среды в городе Москве, +вправе составлять протоколы об административных правонарушениях в соответствии с +частью 1 настоящей статьи, за исключением административных правонарушений, +предусмотренных частью 1 статьи 4.9, статьями 4.13 и 4.37 настоящего Кодекса. + + 3. Помимо случаев, предусмотренных частями 1 и 2 настоящей статьи, +протоколы об административных правонарушениях, предусмотренных настоящим +Кодексом, вправе составлять: + + 1) должностные лица органов внутренних дел - об административных +правонарушениях, предусмотренных статьями 3.1 - 3.3, 3.5; частью 1 статьи 3.6 (в +отношении граждан); статьями 3.8 - 3.13, 3.20, частью 1 статьи 4.9, статьями +4.13, 4.37, главой 5, статьями 7.3, 7.9; статьями 7.12 - 7.18 (в отношении +граждан); статьями 7.22, 7.23, 7.25 (в отношении граждан); статьями 8.3, 8.8, +8.15, 8.17; статьями 10.1, 10.5 (в случае, если лицо, в отношении которого +возбуждено дело об административном правонарушении, оспаривает наличие события +правонарушения и (или) назначенное ему административное наказание либо +отказывается от уплаты административного штрафа на месте совершения +административного правонарушения); статьями 10.2, 10.8-10.10, 11.1 - 11.3, +11.13, 11.15, главой 14 настоящего Кодекса; (В редакции законов Москвы от +12.12.2012 г. № 65; от 13.03.2013 г. № 11; от 27.03.2013 г. № 12; от 26.06.2013 +г. № 36; от 22.01.2014 г. № 2; от 08.07.2015 г. № 43; от 23.11.2016 г. № 39; от +24.01.2018 г. № 3) + + 2) члены районных комиссий по делам несовершеннолетних и защите их прав - +об административных правонарушениях, предусмотренных статьями 2.5, 2.8, 2.9, +2.10.1, 3.11, 3.12 настоящего Кодекса; (В редакции законов Москвы от 12.12.2012 +№ 65, от 30.01.2019 № 4) + + 3) должностные лица территориальных органов исполнительной власти города +Москвы и уполномоченного органа исполнительной власти города Москвы в области +содействия занятости населения - об административных правонарушениях, +предусмотренных статьей 2.2 настоящего Кодекса; (В редакции Закона Москвы от +07.10.2009 г. № 42) + + 4) должностные лица уполномоченного органа исполнительной власти города +Москвы в области градостроительной и архитектурной деятельности - об +административных правонарушениях, предусмотренных статьями 7.1, 7.2, 8.1 +настоящего Кодекса; (В редакции Закона Москвы от 12.12.2012 г. № 65) + + 5) должностные лица уполномоченного органа исполнительной власти города +Москвы в области молодежной и семейной политики - об административных +правонарушениях, предусмотренных статьей 2.4 настоящего Кодекса; + + 6) должностные лица уполномоченного органа исполнительной власти города +Москвы в области охраны здоровья населения - об административных +правонарушениях, предусмотренных статьей 2.10 настоящего Кодекса; + + 7) специалисты по фауне префектур административных округов и управ районов +города Москвы, должностные лица органов государственного ветеринарного надзора, +общественные инспектора по защите животных - об административных +правонарушениях, предусмотренных статьей 2.11, главой 5 настоящего Кодекса; + + 8) должностные лица органа исполнительной власти города Москвы, +уполномоченного на осуществление регионального государственного надзора в +области защиты населения и территорий от чрезвычайных ситуаций, - об +административных правонарушениях, предусмотренных частью 1 статьи 3.6 (в +отношении должностных и юридических лиц); частью 2 статьи 3.6, статьями 3.15, +3.16, 3.18, 3.19; статьями 7.11 - 7.23, 7.25, 8.19 (протоколы об +административных правонарушениях, совершенных в отношении защитных сооружений +гражданской обороны и артезианских скважин), статьей 7.30 настоящего Кодекса; +(В редакции законов Москвы от 07.10.2009 г. № 42; от 23.06.2010 г. № 28; от +13.03.2013 г. № 11; от 27.05.2015 г. № 28; от 09.09.2015 г. № 49; от 23.11.2016 +г. № 39) + + 9) должностные лица уполномоченного органа исполнительной власти города +Москвы в области потребительского рынка и услуг - об административных +правонарушениях, предусмотренных статьями 11.1, 11.2 настоящего Кодекса; (В +редакции законов Москвы от 23.11.2011 г. № 55; от 26.06.2013 г. № 35) + + 10) (Утратил силу - Закон Москвы от 07.10.2009 г. № 42) + + 11) (Утратил силу - Закон Москвы от 07.10.2009 г. № 42) + + 12) (Утратил силу - Закон Москвы от 26.10.2011 г. № 47) + + 13) (Утратил силу - Закон Москвы от 26.10.2011 г. № 47) + + 14) должностные лица уполномоченного органа исполнительной власти города +Москвы, осуществляющего региональный государственный контроль за соблюдением +требований в сфере благоустройства в городе Москве, систематическое наблюдение +(мониторинг) за состоянием объектов благоустройства в городе Москве с +применением мер административного воздействия при выявлении нарушений, контроль +за соблюдением подрядными организациями города Москвы правил и технологии +производства работ по государственным контрактам в сфере благоустройства +городских территорий (ведомственный контроль), региональный государственный +надзор за обеспечением сохранности автомобильных дорог регионального и +межмуниципального значения и региональный государственный надзор в области +технического состояния самоходных машин и других видов техники, а также +аттракционной техники, - об административных правонарушениях, предусмотренных +статьей 7.3, частью 5 статьи 8.13; частями 1 и 2 статьи 8.12, частями 1-4 статьи +8.13 (в отношении граждан и должностных лиц); статьей 8.11 (в отношении +должностных лиц) настоящего Кодекса; (В редакции законов Москвы от 13.03.2013 +г. № 11; от 13.05.2015 г. № 26; от 08.07.2015 г. № 46; от 23.03.2016 г. № 13) + + 15) должностные лица уполномоченных органов исполнительной власти города +Москвы - об административных правонарушениях, предусмотренных статьями 11.1, +11.2, 11.14, 11.15 настоящего Кодекса; + + 16) должностные лица уполномоченного Правительством Москвы органа +государственного контроля за использованием газобаллонных установок на объектах +потребительского рынка - об административных правонарушениях, предусмотренных +статьями 11.11, 11.12 настоящего Кодекса; + + 17) (Утратил силу - Закон Москвы от 12.12.2012 г. № 65) + + 18) (Утратил силу - Закон Москвы от 12.12.2012 г. № 65) + + 4. Протоколы об административных правонарушениях, предусмотренных статьей +14.5 настоящего Кодекса, составляются указанными в частях 1 и 2 настоящей статьи +должностными лицами органов в соответствии с компетенцией этих органов по +рассмотрению дел об административных правонарушениях, установленной статьей 16.3 +настоящего Кодекса. (В редакции законов Москвы от 07.10.2009 г. № 42; от +12.12.2012 г. № 65) + + 5. Перечни должностных лиц, уполномоченных составлять протоколы об +административных правонарушениях в соответствии с частями 1, 2 и 3 настоящей +статьи, устанавливаются соответствующими органами исполнительной власти города +Москвы, учреждениями соответствующих органов исполнительной власти города +Москвы. (В редакции Закона Москвы от 01.06.2011 г. № 21) + + 6. Протоколы об административных правонарушениях, предусмотренных частью 1 +статьи 19.4, частью 1 статьи 19.4-1, частью 1 статьи 19.5, статьей 19.7 Кодекса +Российской Федерации об административных правонарушениях, вправе составлять +следующие должностные лица органов исполнительной власти города Москвы, +уполномоченных на осуществление регионального государственного контроля +(надзора) и (или) переданных полномочий в области федерального государственного +надзора: + + 1) руководители соответствующих органов исполнительной власти города +Москвы, их заместители; + + 2) руководители структурных подразделений соответствующих органов +исполнительной власти города Москвы, их заместители; + + 3) иные должностные лица соответствующих органов исполнительной власти +города Москвы, в должностные обязанности которых входит осуществление +регионального государственного контроля (надзора) и (или) переданных полномочий +в области федерального государственного надзора. + + (Часть дополнена - Закон Москвы от 24.06.2015 г. № 39) + + Статья 16.6. Особенности рассмотрения дел об отдельных административных +правонарушениях + + 1. Дела об административных правонарушениях, предусмотренных статьями 8.3, +8.5, частями 2 и 3 статьи 8.8, частью 1 статьи 8.10, частью 2 статьи 8.14, +статьей 8.15, частью 3 статьи 8.16, статьями 8.17-8.19, частями 1 и 2 статьи +8.22, статьей 8.25 настоящего Кодекса, совершенных с использованием транспортных +средств, в случае фиксации этих административных правонарушений работающими в +автоматическом режиме специальными техническими средствами, имеющими функции +фото- и киносъемки, видеозаписи, или средствами фото- и киносъемки, видеозаписи +рассматриваются в порядке, установленном частью 3 статьи 28.6 Кодекса Российской +Федерации об административных правонарушениях. (В редакции законов Москвы от +13.03.2013 г. № 11; от 27.03.2013 г. № 12; от 26.06.2013 г. № 36; от 18.06.2014 +г. № 31; от 13.05.2015 г. № 26) + + 1.1. Дела об административных правонарушениях, предусмотренных статьей +3.18.1 настоящего Кодекса, в случае фиксации этих административных +правонарушений посредством технологий электронного мониторинга местоположения +гражданина в определенной геолокации с использованием системы городского +видеонаблюдения, технических устройств и программного обеспечения, +рассматриваются в порядке, установленном частью 3 статьи 28.6 Кодекса Российской +Федерации об административных правонарушениях. (Дополнен - Закон Москвы от +01.04.2020 № 6) + + 2. Дела об административных правонарушениях, предусмотренных статьями +8.2-8.4, 8.6, частью 1 статьи 8.7, статьями 8.10 и 8.12, частью 3 статьи 8.13, +статьями 8.15-8.18, 8.20, 8.21, 8.23, 8.26 настоящего Кодекса, выразившихся в +нарушении установленных нормативными правовыми актами города Москвы норм и +правил в области благоустройства территории собственником или иным владельцем +земельного участка либо другого объекта недвижимости (гражданином или +организацией независимо от организационно-правовой формы и формы собственности, +на которых в соответствии с правовыми актами и (или) правоустанавливающими +документами возложены обязанности (бремя) собственника по содержанию территории, +зданий, строений, сооружений или иных объектов недвижимости), в случае фиксации +этих административных правонарушений работающими в автоматическом режиме +специальными техническими средствами, имеющими функции фото- и киносъемки, +видеозаписи, или средствами фото- и киносъемки, видеозаписи рассматриваются в +порядке, установленном частью 3 статьи 28.6 Кодекса Российской Федерации об +административных правонарушениях. (В редакции законов Москвы от 13.03.2013 г. № +11; от 24.06.2015 г. № 32) + + 3. В случае если обязанности собственника или иного владельца земельного +участка или иного объекта недвижимости возложены на орган государственной +власти, орган местного самоуправления, государственное или муниципальное +учреждение, к административной ответственности в соответствии с частью 2 +настоящей статьи привлекается должностное лицо указанного органа, учреждения. (В +редакции законов Москвы от 13.03.2013 г. № 11; от 27.01.2016 г. № 3) + + 4. В целях пресечения административного правонарушения, предусмотренного +частью 4 статьи 3.18.1 настоящего Кодекса, применяется задержание транспортного +средства в порядке, установленном частями 4-8, 10-12 статьи 27.13 Кодекса +Российской Федерации об административных правонарушениях. В этом случае решение +о задержании транспортного средства, прекращении указанного задержания или +возврате транспортного средства принимается должностными лицами, уполномоченными +составлять протоколы об административных правонарушениях, указанных в части 1 +статьи 27.13 Кодекса Российской Федерации об административных правонарушениях. +Уполномоченный орган исполнительной власти города Москвы в области транспорта, +его учреждение, орган исполнительной власти города Москвы, осуществляющий +привлечение лиц, совершивших административные правонарушения на транспорте, в +области дорожного движения, к административной ответственности, направляют +указанным должностным лицам информацию, необходимую для принятия решения о +задержании транспортного средства. + + Перемещение транспортных средств на специализированную стоянку, их хранение +и возврат владельцам, представителям владельцев или лицам, имеющим при себе +документы, необходимые для управления данными транспортными средствами, +осуществляются учреждением уполномоченного органа исполнительной власти города +Москвы в области транспорта в порядке, установленном Законом города Москвы от 11 +июля 2012 года № 42 "О порядке перемещения транспортных средств на +специализированную стоянку, их хранения, оплаты стоимости перемещения и +хранения, возврата транспортных средств в городе Москве" (с учетом особенностей, +предусмотренных настоящей частью). + + (Часть дополнена - Закон Москвы от 01.04.2020 № 6) + + (Статья дополнена - Закон Москвы от 24.10.2012 г. № 51) + + Глава 17. Заключительные и переходные положения + + Статья 17.1. Вступление настоящего Кодекса в силу + + Настоящий Кодекс вступает в силу с 1 января 2008 года. + + Статья 17.2. О признании утратившими силу отдельных законов города Москвы в +связи с вступлением в силу настоящего Кодекса + + Признать утратившими силу с 1 января 2008 года: + + 1) Закон города Москвы от 7 сентября 1994 года № 17-69 "Об установлении +ответственности за нарушение Правил пользования Московским метрополитеном"; + + 2) Закон города Москвы от 24 января 1996 года № 1 "Об административной +ответственности за правонарушения в сфере благоустройства города"; + + 3) Закон города Москвы от 10 июня 1996 года № 18 "Об изменении и дополнении +Закона города Москвы от 24 января 1996 года № 1 "О штрафных санкциях за +правонарушения в сфере благоустройства города"; + + 4) Закон города Москвы от 18 июня 1997 года № 20 "О внесении изменений в +статью 1 Закона города Москвы от 7 сентября 1994 года № 17-69 "Об установлении +ответственности за нарушение Правил пользования Московским метрополитеном"; + + 5) Закон города Москвы от 9 июля 1997 года № 29 "О внесении изменений и +дополнений в Закон города Москвы от 20 декабря 1995 года № 26 "Об +ответственности за нарушение нормативов и жилищных стандартов по использованию, +сохранности и эксплуатации жилищного фонда города Москвы"; + + 6) Закон города Москвы от 17 декабря 1997 года № 56 "О внесении изменений и +дополнений в Закон города Москвы от 24 января 1996 года № 1 "О штрафных санкциях +за правонарушения в сфере благоустройства города"; + + 7) Закон города Москвы от 8 июля 1999 года № 29 "Об административной +ответственности за попустительство незаконному обороту или незаконному +потреблению наркотических средств или психотропных веществ"; + + 8) Закон города Москвы от 14 июля 1999 года № 33 "Об ответственности за +производство и оборот нефтепродуктов, не соответствующих экологическим +требованиям"; + + 9) Закон города Москвы от 15 марта 2000 года № 7 "О внесении изменений и +дополнений в Закон города Москвы от 5 ноября 1997 года № 46 "О защите населения +и территорий города от чрезвычайных ситуаций природного и техногенного +характера"; + + 10) Закон города Москвы от 26 июня 2002 года № 35 "О внесении изменений в +Закон города Москвы от 20 декабря 1995 года № 26 "Об ответственности за +нарушение нормативов и жилищных стандартов по использованию, сохранности и +эксплуатации жилищного фонда города Москвы"; + + 11) Закон города Москвы от 30 октября 2002 года № 53 "Об административной +ответственности за нарушение Правил пользования наземным городским пассажирским +транспортом общего пользования (трамваями, троллейбусами, автобусами) в городе +Москве"; + + 12) Закон города Москвы от 29 января 2003 года № 4 "О внесении изменений в +статьи 2, 3 Закона города Москвы от 12 июля 2002 года № 42 "Об административной +ответственности за нарушение покоя граждан и тишины в ночное время в городе +Москве"; + + 13) Закон города Москвы от 26 февраля 2003 года № 14 "О внесении изменений +в Закон города Москвы от 24 января 1996 года № 1 "О штрафных санкциях за +правонарушения в сфере благоустройства города"; + + 14) Закон города Москвы от 2 июля 2003 года № 46 "Об административной +ответственности за нарушение нормативов города Москвы по использованию +газобаллонных установок на объектах потребительского рынка города Москвы"; + + 15) Закон города Москвы от 16 июля 2003 года № 51 "Об административной +ответственности за правонарушения в сфере потребительского рынка и услуг города +Москвы"; + + 16) Закон города Москвы от 19 мая 2004 года № 34 "Об административной +ответственности за попустительство нахождению несовершеннолетних в общественных +местах в ночное время без сопровождения своих законных представителей"; + + 17) Закон города Москвы от 16 июня 2004 года № 38 "О внесении изменения в +статью 17 Закона города Москвы от 24 января 1996 года № 1 "Об административной +ответственности за правонарушения в сфере благоустройства города"; + + 18) Закон города Москвы от 14 июля 2004 года № 49 "Об административной +ответственности за правонарушения в области содержания и сохранности подземных +инженерных коммуникаций и сооружений"; + + 19) Закон города Москвы от 8 сентября 2004 года № 52 "О внесении изменений +и дополнения в Закон города Москвы от 8 июля 1999 года № 29 "Об административной +ответственности за попустительство незаконному обороту или незаконному +потреблению наркотических средств или психотропных веществ"; + + 20) Закон города Москвы от 2 февраля 2005 года № 5 "О внесении изменений в +Закон города Москвы от 16 июля 2003 года № 51 "Об административной +ответственности за правонарушения в сфере потребительского рынка и услуг города +Москвы"; + + 21) Закон города Москвы от 9 февраля 2005 года № 6 "О признании утратившей +силу статьи 23 Закона города Москвы от 24 января 1996 года № 1 "Об +административной ответственности за правонарушения в сфере благоустройства +города"; + + 22) Закон города Москвы от 13 апреля 2005 года № 11 "О внесении изменений в +Закон города Москвы от 19 мая 2004 года № 34 "Об административной +ответственности за попустительство нахождению несовершеннолетних в общественных +местах в ночное время без сопровождения своих законных представителей"; + + 23) Закон города Москвы от 22 июня 2005 года № 25 "О внесении изменений в +Закон города Москвы от 8 июля 1999 года № 29 "Об административной +ответственности за попустительство незаконному обороту или незаконному +потреблению наркотических средств или психотропных веществ"; + + 24) Закон города Москвы от 18 января 2006 года № 3 "О внесении изменений в +Закон города Москвы от 20 декабря 1995 года № 26 "Об административной +ответственности за нарушение нормативов Москвы по эксплуатации жилищного фонда"; + + 25) Закон города Москвы от 18 января 2006 года № 5 "О внесении изменений в +отдельные законы города Москвы в области охраны окружающей среды"; + + 26) Закон города Москвы от 4 октября 2006 года № 48 "О внесении изменений в +статью 11 Закона города Москвы от 24 января 1996 года № 1 "Об административной +ответственности за правонарушения в сфере благоустройства города" и статью 7 +Закона города Москвы от 14 июля 2004 года № 49 "Об административной +ответственности за правонарушения в области содержания и сохранности подземных +инженерных коммуникаций и сооружений"; + + 27) Закон города Москвы от 11 июля 2007 года № 33 "О действии законов +города Москвы, регулирующих вопросы административной ответственности, в части +способа выражения денежного взыскания, налагаемого за административное +правонарушение". + + Статья 17.3. О внесении изменений в отдельные законы города Москвы в связи +с вступлением в силу настоящего Кодекса + + 1. Внести в Закон города Москвы от 20 декабря 1995 года № 26 "Об +административной ответственности за нарушение нормативов Москвы по эксплуатации +жилищного фонда" следующие изменения: + + 1) название Закона изложить в следующей редакции: + + "О контроле за соблюдением нормативов Москвы по эксплуатации жилищного +фонда"; + + 2) в преамбуле слова "административную ответственность за нарушение" +заменить словами "правовые основы контроля за соблюдением"; + + 3) статьи 2 и 3 изложить в следующей редакции: + + "Статья 2. Контроль за соблюдением нормативов Москвы по + + эксплуатации жилищного фонда + + Контроль за соблюдением нормативов Москвы по эксплуатации жилищного фонда +независимо от его ведомственной принадлежности и форм собственности +осуществляется уполномоченным органом исполнительной власти города Москвы по +государственному контролю за порядком целевого использования и технической +эксплуатации жилья (далее - орган государственного контроля). + + Статья 3. Документы органа государственного контроля + + В связи с проведением конкретного мероприятия по контролю и выявлением при +этом административных нарушений органом государственного контроля в +установленном порядке оформляются следующие документы: распоряжение, +уведомление, акт, предписание. + + Распоряжение - распорядительный документ, являющийся основанием для +проведения уполномоченным должностным лицом (лицами) мероприятия по контролю. + + Уведомление - направляемое юридическому лицу сообщение о проведении в +установленный срок мероприятия по контролю, содержащее требование об обеспечении +доступа к объектам контроля и перечень документов, подлежащих представлению в +обязательном порядке должностному лицу (лицам), уполномоченному (уполномоченным) +на проведение мероприятия по контролю. + + Акт - документ о результатах проведения мероприятия по контролю, +составляемый уполномоченным должностным лицом (лицами). + + Предписание - обязательное для исполнения требование об устранении +выявленных нормативных нарушений, выдаваемое физическому или юридическому лицу +(его представителю) должностным лицом (лицами), осуществляющим (осуществляющими) +мероприятие по контролю. + + Форма официальных документов и их содержание устанавливаются руководителем +органа государственного контроля."; + + 4) статьи 4, 6-8 исключить. + + 2. Внести в Закон города Москвы от 5 ноября 1997 года № 46 "О защите +населения и территорий города от чрезвычайных ситуаций природного и техногенного +характера" следующие изменения: + + 1) статью 24 изложить в следующей редакции: + + "Статья 24. Ответственность за невыполнение требований +законодательства по вопросам предупреждения, защиты населения и территорий от +чрезвычайных ситуаций + + За невыполнение установленных законодательством города Москвы требований по +вопросам предупреждения, защиты населения и территорий от чрезвычайных ситуаций +должностные и юридические лица несут административную ответственность в +соответствии с Кодексом города Москвы об административных правонарушениях."; + + 2) статью 25 исключить. + + 3. Внести в Закон города Москвы от 5 мая 1999 года № 17 "О защите зеленых +насаждений" следующие изменения: + + 1) в абзаце четвертом части второй статьи 11 слова "главой 6 настоящего +Закона" заменить словами "Кодексом города Москвы об административных +правонарушениях"; + + 2) статью 18 изложить в следующей редакции: + + "Статья 18. Административная ответственность за нарушение требований по +защите зеленых насаждений + + За нарушение требований по защите зеленых насаждений граждане, должностные +и юридические лица несут административную ответственность в соответствии с +Кодексом города Москвы об административных правонарушениях. + + Наложение административного штрафа не освобождает виновных лиц от +обязанности возместить причиненный ими вред."; + + 3) статьи 19-25 исключить. + + 4. Внести в Закон города Москвы от 30 июня 1999 года № 28 "О регулировании +использования редких и исчезающих диких животных и растений на территории города +Москвы" следующие изменения: + + 1) в статье 5: + + а) часть третью исключить; + + б) дополнить частью следующего содержания: + + "Наложение административного штрафа не освобождает виновных от обязанности +выполнить предписания по устранению нарушения и возместить причиненный ими +вред."; + + 2) статьи 7 и 9 исключить. + + 5. Статью 8 Закона города Москвы от 17 января 2001 года № 3 "Об обеспечении +беспрепятственного доступа инвалидов к объектам социальной, транспортной и +инженерной инфраструктур города Москвы" изложить в следующей редакции: + + "Статья 8. Ответственность за нарушение настоящего Закона + + За невыполнение требований по обеспечению беспрепятственного доступа +инвалидов и иных маломобильных граждан к объектам социальной, транспортной и +инженерной инфраструктур должностные и юридические лица несут административную +ответственность в соответствии с Кодексом города Москвы об административных +правонарушениях. + + Неоднократное нарушение норм настоящего Закона в период строительства и +проведения ремонтных работ объектов и территорий юридическим лицом, а также +нарушение, совершенное в течение одного года с момента наложения штрафа, может +явиться основанием ликвидации юридического лица в соответствии с требованиями +пункта 2 статьи 61 Гражданского кодекса Российской Федерации.". + + 6. Пункты 5 и 27 статьи 1 Закона города Москвы от 21 февраля 2001 года № 6 +"О приведении в соответствие с Бюджетным кодексом Российской Федерации отдельных +законов города Москвы" исключить. + + 7. Внести в Закон города Москвы от 26 сентября 2001 года № 48 "Об особо +охраняемых природных территориях в городе Москве" следующие изменения: + + 1) в части 7 статьи 7 слова "настоящим Законом" заменить словами "Кодексом +города Москвы об административных правонарушениях"; + + 2) в главе 3: + + а) название дополнить словами "и ответственность за нарушение +законодательства города Москвы об особо охраняемых природных территориях"; + + б) абзац девятнадцатый части 2 статьи 30 изложить в следующей редакции: + + "- въезд, передвижение либо размещение транспортного средства на особо +охраняемой природной территории в нарушение порядка, установленного +Правительством Москвы."; + + в) дополнить статьей 33.1 следующего содержания: + + "Статья 33.1. Ответственность за нарушение законодательства города Москвы +об особо охраняемых природных территориях + + Нарушение законодательства города Москвы об особо охраняемых природных +территориях влечет административную ответственность в соответствии с Кодексом +города Москвы об административных правонарушениях."; + + 3) главу 4 исключить. + + 8. (Утратил силу - Закон Москвы от 03.06.2009 г. № 17) + + 9. Внести в Закон города Москвы от 12 июля 2002 года № 42 "Об +административной ответственности за нарушение покоя граждан и тишины в ночное +время в городе Москве" следующие изменения: + + 1) название Закона изложить в следующей редакции: + + "О соблюдении покоя граждан и тишины в ночное время в городе Москве"; + + 2) преамбулу исключить; + + 3) абзац второй статьи 1 изложить в следующей редакции: + + "Ночное время - период времени с 23 часов до 7 часов."; + + 4) статью 3 изложить в следующей редакции: + + "Статья 3. Административная ответственность за нарушение покоя граждан и +тишины + + Нарушение покоя граждан и тишины в ночное время в городе Москве влечет +административную ответственность в соответствии с Кодексом города Москвы об +административных правонарушениях."; + + 5) статьи 4-10 исключить. + + 10. (Утратил силу - Закон Москвы от 03.06.2009 г. № 17) + + 11. Пункты 8 и 9 статьи 1 Закона города Москвы от 21 мая 2003 года № 29 "О +внесении изменений в Закон города Москвы от 5 ноября 1997 года № 46 "О защите +населения и территорий города от чрезвычайных ситуаций природного и техногенного +характера" исключить. + + 12. Внести в статью 1 Закона города Москвы от 17 марта 2004 года № 12 "О +приведении в соответствие с Кодексом Российской Федерации об административных +правонарушениях отдельных законов города Москвы" следующие изменения: + + 1) подпункты "г", "е", "з", "и" пункта 2 исключить; + + 2) подпункты "г", "ж", "з" пункта 3 исключить; + + 3) пункты 4 и 5 исключить. + + 13. (Утратила силу - Закон Москвы от 03.04.2013 г. № 13) + + 14. Часть 4 статьи 15 Закона города Москвы от 14 июля 2004 года № 50 "О +порядке наделения органов местного самоуправления внутригородских муниципальных +образований в городе Москве отдельными полномочиями города Москвы +(государственными полномочиями)" изложить в следующей редакции: + + "4. Неисполнение или ненадлежащее исполнение обязанностей по реализации +отдельных государственных полномочий органами местного самоуправления влечет +административную ответственность в соответствии с Кодексом города Москвы об +административных правонарушениях.". + + 15. Внести в Закон города Москвы от 20 октября 2004 года № 65 "Об +экологическом мониторинге в городе Москве" следующие изменения: + + 1) статью 2 изложить в следующей редакции: + + "Статья 2. Законодательство города Москвы в области экологического +мониторинга + + 1. Законодательство города Москвы в области экологического мониторинга +основывается на положениях Конституции Российской Федерации, федерального +законодательства об охране окружающей среды, Устава города Москвы и состоит из +настоящего Закона и принимаемых в соответствии с ним иных законов и нормативных +правовых актов города Москвы. + + 2. Нарушение требований законодательства города Москвы в области +экологического мониторинга влечет административную ответственность в +соответствии с Кодексом города Москвы об административных правонарушениях."; + + 2) главу 6 исключить. + + 16. Статьи 2 и 3 Закона города Москвы от 24 ноября 2004 года № 79 "О +внесении изменений в отдельные законы города Москвы в области охраны окружающей +среды" исключить. + + 17. Внести в Закон города Москвы от 2 марта 2005 года № 9 "О комплексном +природопользовании в городе Москве" следующие изменения: + + 1) в преамбуле слова ", а также устанавливает ответственность за нарушение +законодательства в области комплексного природопользования" исключить; + + 2) статью 2 дополнить частью 3 следующего содержания: + + "3. Нарушение требований законодательства города Москвы в области +комплексного природопользования влечет административную ответственность в +соответствии с Кодексом города Москвы об административных правонарушениях."; + + 3) главу 5 исключить. + + 18. Внести в Закон города Москвы от 30 ноября 2005 года № 68 "Об отходах +производства и потребления в городе Москве" следующие изменения: + + 1) статьи 20-23 изложить в следующей редакции: + + "Статья 20. Государственный контроль за деятельностью в области обращения с +отходами + + Государственный контроль за деятельностью в области обращения с отходами +осуществляется в рамках государственного контроля в области охраны окружающей +среды на объектах хозяйственной и иной деятельности, за исключением объектов +хозяйственной и иной деятельности, подлежащих федеральному государственному +экологическому контролю, органом исполнительной власти города Москвы, +осуществляющим государственное управление в области охраны окружающей среды. + + Статья 21. Производственный контроль в области обращения с отходами + + 1. Субъекты хозяйственной и иной деятельности, осуществляющие деятельность +в области обращения с отходами, организуют и осуществляют производственный +контроль за соблюдением требований федерального законодательства и +законодательства города Москвы в области обращения с отходами. + + 2. Порядок осуществления производственного контроля в области обращения с +отходами определяется юридическими лицами и индивидуальными предпринимателями, +осуществляющими деятельность в области обращения с отходами, по согласованию с +органом исполнительной власти города Москвы, осуществляющим государственное +управление в области охраны окружающей среды, в соответствии с его компетенцией. + + Статья 22. Административная ответственность в области обращения с отходами + + Нарушение установленных правовыми актами города Москвы требований в области +обращения с отходами в городе Москве влечет административную ответственность в +соответствии с Кодексом города Москвы об административных правонарушениях. + + Статья 23. Разрешение споров в области обращения с отходами + + Споры в области обращения с отходами разрешаются в судебном порядке."; + + 2) статьи 24-27 исключить. + + 19. Части 7-10 статьи 1 Закона города Москвы от 4 июля 2007 года № 30 "О +внесении изменений в Закон города Москвы от 20 октября 2004 года № 65 "Об +экологическом мониторинге в городе Москве" исключить. + + 20. Внести в Закон города Москвы от 4 июля 2007 года № 31 "О городских +почвах" следующие изменения: + + 1) статью 24 изложить в следующей редакции: + + "Статья 24. Административная ответственность за нарушения в области охраны +и рационального использования городских почв + + Нарушения в области охраны и рационального использования городских почв +влекут административную ответственность в соответствии с Кодексом города Москвы +об административных правонарушениях."; + + 2) статьи 25-28 исключить. + + 21. Внести в Закон города Москвы от 4 июля 2007 года № 32 "О внесении +изменений в отдельные законы города Москвы в области охраны окружающей среды" +следующие изменения: + + 1) части 2-10 статьи 1 исключить; + + 2) статью 2 исключить; + + 3) части 2-7 статьи 3 исключить; + + 4) части 2-22 статьи 4 исключить; + + 5) статьи 5 и 6 исключить. + + 22. Внести в Закон города Москвы от 11 июля 2007 года № 35 "О мониторинге +особых объектов нежилого фонда города Москвы" следующие изменения: + + 1) статью 6 изложить в следующей редакции: + + "Статья 6. Административная ответственность за нарушение требований к +содержанию особых объектов + + Нарушение установленных правовыми актами города Москвы требований к +содержанию особых объектов влечет административную ответственность в +соответствии с Кодексом города Москвы об административных правонарушениях."; + + 2) статьи 7-11 исключить. + + Статья 17.4. Заключительные положения + + 1. Дела об административных правонарушениях, совершенных до вступления в +силу настоящего Кодекса, рассматриваются и административные наказания +назначаются в соответствии с ранее действовавшими статьями законов города +Москвы, предусматривавших административную ответственность, за исключением +случаев, когда настоящим Кодексом смягчается или отменяется ответственность за +соответствующее административное правонарушение. + + 2. Предложить Мэру Москвы, Правительству Москвы, отраслевым и +функциональным органам исполнительной власти города Москвы привести до 1 июля +2008 года свои правовые акты об административных правонарушениях в соответствие +с настоящим Кодексом. + + П.п. Мэр Москвы Ю.М.Лужков + + Москва, Московская + + городская Дума + + 21 ноября 2007 года + + N 45 + diff --git a/labeling/tests/data/tables_archive.zip b/labeling/tests/data/tables_archive.zip new file mode 100644 index 00000000..4ba2dfd4 Binary files /dev/null and b/labeling/tests/data/tables_archive.zip differ diff --git a/tests/data/taskers/bboxes.jsonlines b/labeling/tests/data/taskers/bboxes.jsonlines similarity index 100% rename from tests/data/taskers/bboxes.jsonlines rename to labeling/tests/data/taskers/bboxes.jsonlines diff --git a/tests/data/taskers/images/img_7e292eeb-3e36-11eb-b625-378a51799f07_000001.png b/labeling/tests/data/taskers/images/img_7e292eeb-3e36-11eb-b625-378a51799f07_000001.png similarity index 100% rename from tests/data/taskers/images/img_7e292eeb-3e36-11eb-b625-378a51799f07_000001.png rename to labeling/tests/data/taskers/images/img_7e292eeb-3e36-11eb-b625-378a51799f07_000001.png diff --git a/tests/data/taskers/images/img_7e5b36e1-3e36-11eb-b625-378a51799f07_000002.png b/labeling/tests/data/taskers/images/img_7e5b36e1-3e36-11eb-b625-378a51799f07_000002.png similarity index 100% rename from tests/data/taskers/images/img_7e5b36e1-3e36-11eb-b625-378a51799f07_000002.png rename to labeling/tests/data/taskers/images/img_7e5b36e1-3e36-11eb-b625-378a51799f07_000002.png diff --git a/tests/data/taskers/lines.jsonlines b/labeling/tests/data/taskers/lines.jsonlines similarity index 100% rename from tests/data/taskers/lines.jsonlines rename to labeling/tests/data/taskers/lines.jsonlines diff --git a/tests/data/taskers/test_config.json b/labeling/tests/data/taskers/test_config.json similarity index 100% rename from tests/data/taskers/test_config.json rename to labeling/tests/data/taskers/test_config.json diff --git a/tests/data/taskers/test_manifest.md b/labeling/tests/data/taskers/test_manifest.md similarity index 100% rename from tests/data/taskers/test_manifest.md rename to labeling/tests/data/taskers/test_manifest.md diff --git a/labeling/tests/run_tests_in_docker.sh b/labeling/tests/run_tests_in_docker.sh new file mode 100755 index 00000000..40ef206c --- /dev/null +++ b/labeling/tests/run_tests_in_docker.sh @@ -0,0 +1,16 @@ +if [ "$is_test" = "true" ] + then + apt install -y cowsay + echo "run tests" + sleep 5 + python3 -m unittest -v -f /labeling_root/labeling/tests/test* + test_exit_code=$? + if [ $test_exit_code -eq 0 ] + then /usr/games/cowsay "all right" + else /usr/games/cowsay -f dragon "tests failed" + fi + exit "$test_exit_code" +else + echo "skip tests, if you want to run tests do " + echo 'test="true" docker-compose up --build --exit-code-from test' +fi diff --git a/labeling/tests/test_api_taskers.py b/labeling/tests/test_api_taskers.py new file mode 100644 index 00000000..0eabc1e2 --- /dev/null +++ b/labeling/tests/test_api_taskers.py @@ -0,0 +1,102 @@ +import json +import os +import re +import tempfile +import time +import unittest +import zipfile + +import requests + + +class TestApiTaskers(unittest.TestCase): + data_directory_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) + host = os.environ.get("HOST", "localhost") + port = int(os.environ.get("PORT", "1232")) + uid_reg = re.compile(r"\?uid=(.*)\"") + result_archive_reg = re.compile(r"") + + def tearDown(self) -> None: + requests.get(f"http://{self.host}:{self.port}/clear") + + def test_info(self) -> None: + r = requests.get(f"http://{self.host}:{self.port}/") + self.assertEqual(200, r.status_code) + + def test_line_label_tasker(self) -> None: + archive_name = "archive.zip" + self.__test_any_tasker(data=dict(type_of_task="law_classifier", document_type="law"), archive_name=archive_name) + self.__test_any_tasker(data=dict(type_of_task="tz_classifier", document_type="tz"), archive_name=archive_name) + self.__test_any_tasker(data=dict(type_of_task="paragraph_classifier"), archive_name=archive_name) + + def test_filtered_line_label_tasker(self) -> None: + self.__test_any_tasker(data=dict(type_of_task="diploma_classifier", document_type="diploma"), archive_name="diploma_archive.zip") + + def test_header_footer_tasker(self) -> None: + self.__test_any_tasker(data=dict(type_of_task="header_classifier"), archive_name="archive.zip") + + def test_table_tasker(self) -> None: + self.__test_any_tasker(data=dict(type_of_task="tables_classifier"), archive_name="tables_archive.zip", test_original_documents=False) + + def __test_any_tasker(self, data: dict, archive_name: str, test_original_documents: bool = True) -> None: + task_uid = self._send_archive(archive_name, data=data) + with tempfile.TemporaryDirectory() as tmp_dir: + result_archive_path = self._get_result_archive(uid=task_uid, tmp_dir=tmp_dir) + + with zipfile.ZipFile(result_archive_path) as archive: + name_list = archive.namelist() + for file_name in ["original_documents.zip", "formInput.html", "formResult.html", "task_manager.py"]: + self.assertIn(file_name, name_list) + archive.extractall(tmp_dir) + + if test_original_documents: + with zipfile.ZipFile(os.path.join(tmp_dir, "original_documents.zip")) as output_archive: + output_archive_extensions = sorted([os.path.splitext(file_name)[-1] for file_name in output_archive.namelist()]) + with zipfile.ZipFile(os.path.join(self.data_directory_path, archive_name)) as input_archive: + input_archive_extensions = sorted([os.path.splitext(file_name)[-1] for file_name in input_archive.namelist()]) + self.assertListEqual(output_archive_extensions, input_archive_extensions, "Documents of the task should contain all input documents") + + task_archive_name = [file_name for file_name in name_list if file_name.startswith("task_") and file_name.endswith(".zip")][0] + with zipfile.ZipFile(os.path.join(tmp_dir, task_archive_name)) as task_archive: + task_name_list = [os.path.basename(file_name) for file_name in task_archive.namelist()] + for file_name in ["config.json", "Dockerfile", "manifest.pdf", "README.md", "run.sh", "tasks.json"]: + self.assertIn(file_name, task_name_list) + task_archive.extractall(tmp_dir) + + task_dir_path = os.path.join(tmp_dir, os.path.splitext(task_archive_name)[0]) + with open(os.path.join(task_dir_path, "tasks.json")) as tasks_file: + tasks_dict = json.load(tasks_file) + for task in tasks_dict.values(): + self.assertTrue(os.path.exists(os.path.join(task_dir_path, task["task_path"])), f'{task["task_path"]} does not exist') + + def _send_archive(self, file_name: str, data: dict = None) -> str: + data = {} if data is None else data + abs_path = os.path.join(self.data_directory_path, file_name) + + with open(abs_path, "rb") as file: + r = requests.post(f"http://{self.host}:{self.port}/upload_archive", files={"file": (file_name, file)}, data=data) + self.assertEqual(201, r.status_code) + + uid_match = self.uid_reg.findall(r.content.decode()) + self.assertEqual(1, len(uid_match)) + return uid_match[0] + + def _get_result_archive(self, uid: str, tmp_dir: str) -> str: + r = requests.get(f"http://{self.host}:{self.port}/get_result_archive?uid={uid}") + self.assertIn(r.status_code, (200, 202)) + + while r.status_code == 202: + r = requests.get(f"http://{self.host}:{self.port}/get_result_archive?uid={uid}") + time.sleep(10) + self.assertEqual(200, r.status_code) + + result_archive_match = self.result_archive_reg.findall(r.content.decode()) + self.assertEqual(1, len(result_archive_match)) + archive_name = result_archive_match[0] + r = requests.get(f"http://{self.host}:{self.port}/return-file/{archive_name}") + self.assertEqual(200, r.status_code) + + archive_out_path = os.path.join(tmp_dir, archive_name) + with open(archive_out_path, "wb") as f: + f.write(r.content) + return archive_out_path diff --git a/labeling/tests/test_images_creators.py b/labeling/tests/test_images_creators.py new file mode 100644 index 00000000..8b37ba3d --- /dev/null +++ b/labeling/tests/test_images_creators.py @@ -0,0 +1,119 @@ +import json +import os +import shutil +import tempfile +import unittest +import zipfile +from typing import Optional + +from dedoc.attachments_handler.attachments_handler import AttachmentsHandler +from dedoc.config import get_config +from dedoc.converters.converter_composition import ConverterComposition +from dedoc.dedoc_manager import DedocManager +from dedoc.metadata_extractors.concrete_metadata_extractors.base_metadata_extractor import BaseMetadataExtractor +from dedoc.metadata_extractors.metadata_extractor_composition import MetadataExtractorComposition +from dedoc.readers import PdfImageReader, PdfTabbyReader, PdfTxtlayerReader +from dedoc.readers.docx_reader.docx_reader import DocxReader +from dedoc.readers.reader_composition import ReaderComposition +from dedoc.readers.txt_reader.raw_text_reader import RawTextReader +from dedoc.structure_constructors.concrete_structure_constructors.tree_constructor import TreeConstructor +from dedoc.structure_constructors.structure_constructor_composition import StructureConstructorComposition +from dedoc.structure_extractors.concrete_structure_extractors.classifying_law_structure_extractor import ClassifyingLawStructureExtractor +from dedoc.structure_extractors.concrete_structure_extractors.default_structure_extractor import DefaultStructureExtractor +from dedoc.structure_extractors.concrete_structure_extractors.foiv_law_structure_extractor import FoivLawStructureExtractor +from dedoc.structure_extractors.concrete_structure_extractors.law_structure_excractor import LawStructureExtractor +from dedoc.structure_extractors.structure_extractor_composition import StructureExtractorComposition +from dedoc.utils.train_dataset_utils import get_path_original_documents +from train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.txt_images_creator import TxtImagesCreator + + +class TestImagesCreators(unittest.TestCase): + config = get_config() + config["labeling_mode"] = True + path2docs = get_path_original_documents(config) + + def setUp(self) -> None: + self.path2docs = get_path_original_documents(self.config) + + def tearDown(self) -> None: + lines_path = os.path.join(self.config["intermediate_data_path"], "lines.jsonlines") + if os.path.exists(lines_path): + os.remove(lines_path) + + if os.path.exists(self.path2docs): + shutil.rmtree(self.path2docs) + + def test_docx_images_creator(self) -> None: + docx_images_creator = DocxImagesCreator(self.path2docs, config=self.config) + self.__test_images_creator(images_creator=docx_images_creator, doc_name="english_doc.docx", num_images=3) + + def test_txt_images_creator(self) -> None: + txt_images_creator = TxtImagesCreator(self.path2docs, config=self.config) + self.__test_images_creator(images_creator=txt_images_creator, doc_name="txt_example.txt", num_images=7) + + def test_scanned_images_creator(self) -> None: + scanned_images_creator = ScannedImagesCreator(self.path2docs) + self.__test_images_creator( + images_creator=scanned_images_creator, + doc_name="english_doc.pdf", + num_images=3, + parameters=dict(document_type="law", pdf_with_text_layer="true") + ) + self.__test_images_creator( + images_creator=scanned_images_creator, + doc_name="english_doc.pdf", + num_images=3, + parameters=dict(document_type="law", pdf_with_text_layer="tabby") + ) + self.__test_images_creator(images_creator=scanned_images_creator, doc_name="law_image.png", num_images=34) + + def __test_images_creator(self, images_creator: AbstractImagesCreator, doc_name: str, num_images: int, parameters: Optional[dict] = None) -> None: + parameters = dict(document_type="law") if parameters is None else parameters + files_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data", "images_creators")) + test_manager = DedocManager(manager_config=self.__create_test_manager_config(self.config), config=self.config) + + with tempfile.TemporaryDirectory() as tmp_dir: + with zipfile.ZipFile(os.path.join(tmp_dir, "archive.zip"), "w") as archive: + _ = test_manager.parse(file_path=os.path.join(files_dir, doc_name), parameters=parameters) + lines_path = os.path.join(self.config["intermediate_data_path"], "lines.jsonlines") + self.assertTrue(os.path.isfile(lines_path)) + with open(lines_path, "r") as f: + lines = [json.loads(line) for line in f] + original_doc = lines[0]["original_document"] + path = os.path.join(get_path_original_documents(self.config), original_doc) + self.assertTrue(os.path.isfile(path), f"{path} does not exist") + self.assertTrue(images_creator.can_read(lines)) + images_creator.add_images(page=lines, archive=archive) + self.assertEqual(len(archive.namelist()), num_images) + os.remove(lines_path) + os.remove(path) + + def __create_test_manager_config(self, config: dict) -> dict: + readers = [ + DocxReader(config=config), + RawTextReader(config=config), + PdfTxtlayerReader(config=config), + PdfTabbyReader(config=config), + PdfImageReader(config=config) + ] + metadata_extractors = [BaseMetadataExtractor()] + law_extractors = { + FoivLawStructureExtractor.document_type: FoivLawStructureExtractor(config=config), + LawStructureExtractor.document_type: LawStructureExtractor(config=config) + } + structure_extractors = { + DefaultStructureExtractor.document_type: DefaultStructureExtractor(), + ClassifyingLawStructureExtractor.document_type: ClassifyingLawStructureExtractor(extractors=law_extractors, config=config) + } + + return dict( + converter=ConverterComposition(converters=[]), + reader=ReaderComposition(readers=readers), + structure_extractor=StructureExtractorComposition(extractors=structure_extractors, default_key="other"), + structure_constructor=StructureConstructorComposition(default_constructor=TreeConstructor(), constructors={"tree": TreeConstructor()}), + document_metadata_extractor=MetadataExtractorComposition(extractors=metadata_extractors), + attachments_handler=AttachmentsHandler(config=config) + ) diff --git a/tests/unit_tests/test_misc_line_extractor.py b/labeling/tests/test_line_extractor.py similarity index 70% rename from tests/unit_tests/test_misc_line_extractor.py rename to labeling/tests/test_line_extractor.py index eec1cfeb..27b8bcde 100644 --- a/tests/unit_tests/test_misc_line_extractor.py +++ b/labeling/tests/test_line_extractor.py @@ -2,17 +2,16 @@ import os import unittest -from dedoc.train_dataset.extractors.line_with_meta_extractor import LineWithMetaExtractor -from tests.test_utils import get_test_config +from dedoc.config import get_config +from train_dataset.extractors.line_with_meta_extractor import LineWithMetaExtractor class TestLineWithMetaExtractor(unittest.TestCase): def test_txt_file(self) -> None: - config = get_test_config() - documents_path = os.path.join(os.path.dirname(__file__), "..", "data", "laws") - documents_path = os.path.abspath(documents_path) - self.assertTrue(os.path.isdir(documents_path)) + config = get_config() + config["labeling_mode"] = True + documents_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data", "laws")) path = os.path.join(documents_path, "law_classifier_000000_Bhw.json") self.assertTrue(os.path.isfile(path)) extractor = LineWithMetaExtractor(path=path, documents_path=documents_path, config=config) diff --git a/labeling/tests/test_taskers.py b/labeling/tests/test_taskers.py new file mode 100644 index 00000000..511916ac --- /dev/null +++ b/labeling/tests/test_taskers.py @@ -0,0 +1,84 @@ +import os +import tempfile +import unittest +from zipfile import ZipFile + +from PIL import Image + +from dedoc.config import get_config +from train_dataset.taskers.concrete_taskers.line_label_tasker import LineLabelTasker +from train_dataset.taskers.tasker import Tasker + + +class TestTaskers(unittest.TestCase): + base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data", "taskers")) + path2lines = os.path.join(base_path, "lines.jsonlines") + path2docs = os.path.join(base_path, "images") + manifest_path = os.path.join(base_path, "test_manifest.md") + config_path = os.path.join(base_path, "test_config.json") + config = get_config() + config["labeling_mode"] = True + + def test_paths(self) -> None: + self.assertTrue(os.path.isfile(self.path2lines), self.path2lines) + self.assertTrue(os.path.isfile(self.manifest_path), self.manifest_path) + self.assertTrue(os.path.isfile(self.config_path), self.config_path) + self.assertTrue(os.path.isdir(self.path2docs), self.path2docs) + + def test_line_label_tasker_size1(self) -> None: + tasker = self._get_line_label_classifier() + task_cnt = 0 + for task_path in tasker.create_tasks(task_size=10): + task_cnt += 1 + self._test_task_archive(task_path) + + def test_line_label_tasker_size2(self) -> None: + tasker = self._get_line_label_classifier() + + task_cnt = 0 + for task_path in tasker.create_tasks(task_size=2): + task_cnt += 1 + self._test_task_archive(task_path) + + def test_tasker(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + taskers = { + "law_classifier": LineLabelTasker( + path2lines=self.path2lines, + path2docs=self.path2docs, + manifest_path=self.manifest_path, + config_path=self.config_path, + tmp_dir=tmpdir, + config=self.config + ) + } + tasker = Tasker(line_info_path=self.path2lines, images_path=self.path2docs, save_path=tmpdir, concrete_taskers=taskers, config=self.config) + tasks_path, task_size = tasker.create_tasks(type_of_task="law_classifier", task_size=1) + self.assertTrue(os.path.isfile(tasks_path)) + self.assertEqual(1, task_size) + with ZipFile(tasks_path) as archive: + self.assertIn("original_documents.zip", archive.namelist()) + + def _get_line_label_classifier(self) -> LineLabelTasker: + tasker = LineLabelTasker( + path2lines=self.path2lines, + path2docs=self.path2docs, + manifest_path=self.manifest_path, + config_path=self.config_path, + tmp_dir="/tmp/tasker_test", + config=self.config + ) + return tasker + + def _test_task_archive(self, task_path: str) -> None: + self.assertTrue(os.path.isfile(task_path)) + with ZipFile(task_path) as archive: + namelist = [name.split("/", maxsplit=1)[-1] for name in archive.namelist()] + self.assertIn("test_config.json", namelist) + self.assertIn("test_manifest.md", namelist) + images_paths = [image for image in archive.namelist() if "_img_bbox_" in image] + self.assertTrue(len(images_paths) > 0) + for image_path in images_paths: + with archive.open(image_path) as image_file: + image = Image.open(image_file) + self.assertEqual((1276, 1754), image.size) diff --git a/dedoc/api/train_dataset/__init__.py b/labeling/train_dataset/__init__.py similarity index 100% rename from dedoc/api/train_dataset/__init__.py rename to labeling/train_dataset/__init__.py diff --git a/dedoc/scripts/train/__init__.py b/labeling/train_dataset/api/__init__.py similarity index 100% rename from dedoc/scripts/train/__init__.py rename to labeling/train_dataset/api/__init__.py diff --git a/dedoc/api/train_dataset/train_dataset_api.py b/labeling/train_dataset/api/api.py similarity index 81% rename from dedoc/api/train_dataset/train_dataset_api.py rename to labeling/train_dataset/api/api.py index 4493b51d..bc427504 100644 --- a/dedoc/api/train_dataset/train_dataset_api.py +++ b/labeling/train_dataset/api/api.py @@ -12,17 +12,16 @@ from starlette.templating import Jinja2Templates from dedoc.api.api_args import QueryParameters -from dedoc.api.dedoc_api import _get_static_file_path -from dedoc.api.train_dataset.async_archive_handler import AsyncHandler from dedoc.config import get_config from dedoc.dedoc_manager import DedocManager -from dedoc.train_dataset.taskers.concrete_taskers.filtered_line_label_tasker import FilteredLineLabelTasker -from dedoc.train_dataset.taskers.concrete_taskers.header_footer_tasker import HeaderFooterTasker -from dedoc.train_dataset.taskers.concrete_taskers.line_label_tasker import LineLabelTasker -from dedoc.train_dataset.taskers.concrete_taskers.table_tasker import TableTasker -from dedoc.train_dataset.taskers.tasker import Tasker -from dedoc.train_dataset.train_dataset_utils import get_path_original_documents +from dedoc.utils.train_dataset_utils import get_path_original_documents from dedoc.utils.utils import calculate_file_hash +from train_dataset.api.async_archive_handler import AsyncHandler +from train_dataset.taskers.concrete_taskers.filtered_line_label_tasker import FilteredLineLabelTasker +from train_dataset.taskers.concrete_taskers.header_footer_tasker import HeaderFooterTasker +from train_dataset.taskers.concrete_taskers.line_label_tasker import LineLabelTasker +from train_dataset.taskers.concrete_taskers.table_tasker import TableTasker +from train_dataset.taskers.tasker import Tasker @dataclass @@ -37,26 +36,25 @@ class TrainDatasetParameters(QueryParameters): config = get_config() -PORT = config["api_port"] +config["labeling_mode"] = True +PORT = int(os.environ.get("LABELING_PORT", "1232")) +config["api_port"] = PORT -static_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "web") +static_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "web") static_files_dirs = config.get("static_files_dirs") +UPLOAD_FOLDER = os.path.join(config["resources_path"], "tasks") +train_resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "..", "..", "resources") + logger = config.get("logger", logging.getLogger()) app = FastAPI() app.mount("/web", StaticFiles(directory=static_path), name="web") -templates = Jinja2Templates(directory=os.path.join(static_path, "train_dataset")) +templates = Jinja2Templates(directory=static_path) manager = DedocManager(config=config) - -project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) -UPLOAD_FOLDER = os.path.join(config["resources_path"], "tasks") -train_resources_path = os.path.join(project_root, "resources", "train_dataset") - path2lines = os.path.join(config["intermediate_data_path"], "lines.jsonlines") -boxes_path = os.path.join(config["intermediate_data_path"], "bboxes.jsonlines") progress_bar = {} label2label_law = { @@ -92,7 +90,6 @@ class TrainDatasetParameters(QueryParameters): taskers = { "law_classifier": LineLabelTasker( - path2bboxes=boxes_path, path2lines=path2lines, path2docs=get_path_original_documents(config), manifest_path=os.path.join(train_resources_path, "law", "manifest.pdf"), @@ -102,7 +99,6 @@ class TrainDatasetParameters(QueryParameters): item2label=lambda t: label2label_law.get(t["_metadata"]["hierarchy_level"]["line_type"], "raw_text"), config=config), "paragraph_classifier": LineLabelTasker( - path2bboxes=boxes_path, path2lines=path2lines, path2docs=get_path_original_documents(config), manifest_path=os.path.join(train_resources_path, "paragraph", "manifest.pdf"), @@ -112,7 +108,6 @@ class TrainDatasetParameters(QueryParameters): item2label=lambda t: "not_paragraph", config=config), "tz_classifier": LineLabelTasker( - path2bboxes=boxes_path, path2lines=path2lines, path2docs=get_path_original_documents(config), manifest_path=os.path.join(train_resources_path, "tz", "manifest.pdf"), @@ -122,7 +117,6 @@ class TrainDatasetParameters(QueryParameters): item2label=lambda t: label2label_tz.get(t["_metadata"]["hierarchy_level"]["line_type"], "raw_text"), config=config), "diploma_classifier": FilteredLineLabelTasker( - path2bboxes=boxes_path, path2lines=path2lines, path2docs=get_path_original_documents(config), manifest_path=os.path.join(train_resources_path, "diploma", "manifest.pdf"), @@ -132,7 +126,6 @@ class TrainDatasetParameters(QueryParameters): item2label=lambda t: label2label_diploma.get(t["_metadata"]["hierarchy_level"]["line_type"], "raw_text"), config=config), "header_classifier": HeaderFooterTasker( - path2bboxes=boxes_path, path2lines=path2lines, path2docs=get_path_original_documents(config), manifest_path=os.path.join(train_resources_path, "header", "manifest.pdf"), @@ -144,8 +137,7 @@ class TrainDatasetParameters(QueryParameters): "tables_classifier": TableTasker() } -tasker = Tasker(boxes_label_path=os.path.join(config["intermediate_data_path"], "bboxes.jsonlines"), - line_info_path=os.path.join(config["intermediate_data_path"], "lines.jsonlines"), +tasker = Tasker(line_info_path=os.path.join(config["intermediate_data_path"], "lines.jsonlines"), images_path=get_path_original_documents(config), save_path=UPLOAD_FOLDER, concrete_taskers=taskers, @@ -159,7 +151,7 @@ def get_info() -> Response: """ Returns the main page for the labeling mode. """ - return FileResponse(os.path.join(static_path, "train_dataset/info_labeling_mode.html")) + return FileResponse(os.path.join(static_path, "info.html")) @app.get("/handle_archive") @@ -167,7 +159,7 @@ def handle_archive() -> Response: """ Returns the page for running the whole pipeline of task making. """ - return FileResponse(os.path.join(static_path, "train_dataset/form_input_archive.html")) + return FileResponse(os.path.join(static_path, "form_input_archive.html")) @app.post("/upload_archive") @@ -203,8 +195,10 @@ def get_result_archive(request: Request, uid: str) -> Response: @app.get("/static_file") def get_static_file(request: Request) -> Response: - path = _get_static_file_path(request) - return FileResponse(path) + file = request.query_params.get("fname") + directory_name = request.query_params.get("directory") + directory = static_files_dirs[directory_name] if directory_name is not None and directory_name in static_files_dirs else static_path + return FileResponse(os.path.abspath(os.path.join(directory, file))) @app.get("/return-file/{filename}") diff --git a/dedoc/api/train_dataset/async_archive_handler.py b/labeling/train_dataset/api/async_archive_handler.py similarity index 97% rename from dedoc/api/train_dataset/async_archive_handler.py rename to labeling/train_dataset/api/async_archive_handler.py index abf4d761..fa1495b7 100644 --- a/dedoc/api/train_dataset/async_archive_handler.py +++ b/labeling/train_dataset/api/async_archive_handler.py @@ -12,7 +12,7 @@ from dedoc.common.exceptions.bad_file_error import BadFileFormatError from dedoc.dedoc_manager import DedocManager -from dedoc.train_dataset.taskers.tasker import Tasker +from train_dataset.taskers.tasker import Tasker class _ArchiveHandler(Thread): @@ -49,6 +49,7 @@ def _handle_archive(self, uid: str, path: str, parameters: dict) -> str: return task except Exception as e: self.progress[uid] = f"Fail with\n{e}" + self.results[uid] = f"Fail with\n{e}" raise e def __handle_one_file(self, archive: zipfile.ZipFile, file: str, parameters: dict) -> None: diff --git a/dedoc/api/web/train_dataset/download.html b/labeling/train_dataset/api/web/download.html similarity index 87% rename from dedoc/api/web/train_dataset/download.html rename to labeling/train_dataset/api/web/download.html index 3c0ee209..9a51c847 100644 --- a/dedoc/api/web/train_dataset/download.html +++ b/labeling/train_dataset/api/web/download.html @@ -16,7 +16,7 @@

Download from localhost

- wget localhost:1231/return-file/{{ filename }} -O {{ filename }}.part ; mv {{ filename }}.part {{ filename }} + wget localhost:1232/return-file/{{ filename }} -O {{ filename }}.part ; mv {{ filename }}.part {{ filename }}

diff --git a/dedoc/api/web/train_dataset/form_input_archive.html b/labeling/train_dataset/api/web/form_input_archive.html similarity index 91% rename from dedoc/api/web/train_dataset/form_input_archive.html rename to labeling/train_dataset/api/web/form_input_archive.html index d50f5b73..986861e9 100644 --- a/dedoc/api/web/train_dataset/form_input_archive.html +++ b/labeling/train_dataset/api/web/form_input_archive.html @@ -1,8 +1,8 @@ - + - Загрузка файла + Dedoc | upload file @@ -10,7 +10,7 @@
-

Распознавание структуры документа

+

Make tasks for labeling

@@ -46,7 +46,7 @@

Распознавание структуры документа

-

@@ -77,11 +77,11 @@

Распознавание структуры документа

- +
- +
diff --git a/labeling/train_dataset/api/web/info.html b/labeling/train_dataset/api/web/info.html new file mode 100644 index 00000000..52f50d84 --- /dev/null +++ b/labeling/train_dataset/api/web/info.html @@ -0,0 +1,38 @@ + + + + + Dedoc | info page + + + + +
+ +

The process of making datasets and training classifiers

+ +

Step 1 - Making tasks for a labeling system

+
    +
  1. + Run dedoc labeling service: docker-compose -f labeling/docker-compose.yml up --build +
  2. +
  3. + Prepare the ZIP archive with documents for labeling. The following document formats are supported: TXT, DOCX, PDF, images. +
  4. +
  5. + For making tasks for a labeling system go here, configure parameters and upload the archive. +
  6. +
+ +

Step 2 - Data labeling

+

+ Labeling of the prepared tasks is executed via external labeling system. +

+ +

Step 3 - Clearing data for labeling

+

+ Here one may clear the data used while making tasks for labeling. +

+
+ + \ No newline at end of file diff --git a/dedoc/train_dataset/__init__.py b/labeling/train_dataset/data_structures/__init__.py similarity index 100% rename from dedoc/train_dataset/__init__.py rename to labeling/train_dataset/data_structures/__init__.py diff --git a/dedoc/train_dataset/data_structures/images_archive.py b/labeling/train_dataset/data_structures/images_archive.py similarity index 100% rename from dedoc/train_dataset/data_structures/images_archive.py rename to labeling/train_dataset/data_structures/images_archive.py diff --git a/dedoc/train_dataset/data_structures/line_with_label.py b/labeling/train_dataset/data_structures/line_with_label.py similarity index 100% rename from dedoc/train_dataset/data_structures/line_with_label.py rename to labeling/train_dataset/data_structures/line_with_label.py diff --git a/dedoc/train_dataset/data_structures/task_item.py b/labeling/train_dataset/data_structures/task_item.py similarity index 100% rename from dedoc/train_dataset/data_structures/task_item.py rename to labeling/train_dataset/data_structures/task_item.py diff --git a/dedoc/train_dataset/data_structures/__init__.py b/labeling/train_dataset/exceptions/__init__.py similarity index 100% rename from dedoc/train_dataset/data_structures/__init__.py rename to labeling/train_dataset/exceptions/__init__.py diff --git a/dedoc/train_dataset/exceptions/empty_page_error.py b/labeling/train_dataset/exceptions/empty_page_error.py similarity index 60% rename from dedoc/train_dataset/exceptions/empty_page_error.py rename to labeling/train_dataset/exceptions/empty_page_error.py index b17924f4..c2b0c98f 100644 --- a/dedoc/train_dataset/exceptions/empty_page_error.py +++ b/labeling/train_dataset/exceptions/empty_page_error.py @@ -1,4 +1,4 @@ -from dedoc.train_dataset.exceptions.train_dataset_error import TrainDatasetError +from train_dataset.exceptions.train_dataset_error import TrainDatasetError class EmptyPageError(TrainDatasetError): diff --git a/dedoc/train_dataset/exceptions/task_creation_error.py b/labeling/train_dataset/exceptions/task_creation_error.py similarity index 61% rename from dedoc/train_dataset/exceptions/task_creation_error.py rename to labeling/train_dataset/exceptions/task_creation_error.py index 7f5d7710..5497508d 100644 --- a/dedoc/train_dataset/exceptions/task_creation_error.py +++ b/labeling/train_dataset/exceptions/task_creation_error.py @@ -1,4 +1,4 @@ -from dedoc.train_dataset.exceptions.train_dataset_error import TrainDatasetError +from train_dataset.exceptions.train_dataset_error import TrainDatasetError class TaskCreationError(TrainDatasetError): diff --git a/dedoc/train_dataset/exceptions/train_dataset_error.py b/labeling/train_dataset/exceptions/train_dataset_error.py similarity index 100% rename from dedoc/train_dataset/exceptions/train_dataset_error.py rename to labeling/train_dataset/exceptions/train_dataset_error.py diff --git a/dedoc/train_dataset/exceptions/unknown_task_error.py b/labeling/train_dataset/exceptions/unknown_task_error.py similarity index 71% rename from dedoc/train_dataset/exceptions/unknown_task_error.py rename to labeling/train_dataset/exceptions/unknown_task_error.py index b6257a12..57050266 100644 --- a/dedoc/train_dataset/exceptions/unknown_task_error.py +++ b/labeling/train_dataset/exceptions/unknown_task_error.py @@ -1,4 +1,4 @@ -from dedoc.train_dataset.exceptions.train_dataset_error import TrainDatasetError +from train_dataset.exceptions.train_dataset_error import TrainDatasetError class UnknownTaskError(TrainDatasetError): diff --git a/dedoc/train_dataset/exceptions/__init__.py b/labeling/train_dataset/extractors/__init__.py similarity index 100% rename from dedoc/train_dataset/exceptions/__init__.py rename to labeling/train_dataset/extractors/__init__.py diff --git a/dedoc/train_dataset/extractors/line_with_meta_extractor.py b/labeling/train_dataset/extractors/line_with_meta_extractor.py similarity index 95% rename from dedoc/train_dataset/extractors/line_with_meta_extractor.py rename to labeling/train_dataset/extractors/line_with_meta_extractor.py index eee9def0..36caa924 100644 --- a/dedoc/train_dataset/extractors/line_with_meta_extractor.py +++ b/labeling/train_dataset/extractors/line_with_meta_extractor.py @@ -14,10 +14,11 @@ from dedoc.readers.html_reader.html_reader import HtmlReader from dedoc.readers.pdf_reader.data_classes.page_with_bboxes import PageWithBBox from dedoc.readers.pdf_reader.data_classes.text_with_bbox import TextWithBBox +from dedoc.readers.pdf_reader.data_classes.word_with_bbox import WordWithBBox from dedoc.readers.pdf_reader.pdf_image_reader.line_metadata_extractor.metadata_extractor import LineMetadataExtractor from dedoc.readers.txt_reader.raw_text_reader import RawTextReader -from dedoc.train_dataset.data_structures.images_archive import ImagesArchive -from dedoc.train_dataset.data_structures.line_with_label import LineWithLabel +from train_dataset.data_structures.images_archive import ImagesArchive +from train_dataset.data_structures.line_with_label import LineWithLabel class LineWithMetaExtractor: @@ -97,7 +98,7 @@ def __create_bbox(self, data: dict) -> TextWithBBox: return TextWithBBox( bbox=bbox, page_num=data["data"]["bbox"]["page_num"], - text=data["data"]["bbox"]["text"], + words=[WordWithBBox(text=data["data"]["bbox"]["text"], bbox=bbox)], line_num=data["data"]["bbox"]["line_num"], uid=data["data"]["bbox"]["uid"] ) diff --git a/labeling/train_dataset/main.py b/labeling/train_dataset/main.py new file mode 100644 index 00000000..790885e0 --- /dev/null +++ b/labeling/train_dataset/main.py @@ -0,0 +1,4 @@ +from train_dataset.api.api import run_special_api + +if __name__ == "__main__": + run_special_api() diff --git a/dedoc/train_dataset/task_manager.py b/labeling/train_dataset/task_manager.py similarity index 91% rename from dedoc/train_dataset/task_manager.py rename to labeling/train_dataset/task_manager.py index eb8587cf..d60067f2 100644 --- a/dedoc/train_dataset/task_manager.py +++ b/labeling/train_dataset/task_manager.py @@ -26,16 +26,16 @@ def get_info() -> str: if len(tasks) > 0: return form_input.format(tasks_left=len(tasks)) else: - return """

Размечать нечего

-

Получить результаты

""" + return """

There is nothing to annotate

+

Get results

""" @app.route("/upload", methods=["POST"]) def upload() -> Union[str, Response]: parameters = {k: v for k, v in request.values.items()} - name = parameters.get("name", "Инкогнито") + name = parameters.get("name", "Unknown") if len(tasks) == 0: - return '

Размечать нечего

' + return '

There is nothing to annotate

' else: task = tasks.pop() with open("task_manager.log", "a") as file_log: @@ -62,7 +62,7 @@ def upload_results() -> Response: path_out = os.path.join(tmp_dir, file_name) archive.extract(member=file_name, path=tmp_dir) _save_result_file(path_out, file_name) - return f"

Результат получен {cnt}

" + return f"

The result has been sent {cnt}

" if request.method == "GET": return form_results diff --git a/dedoc/train_dataset/extractors/__init__.py b/labeling/train_dataset/taskers/__init__.py similarity index 100% rename from dedoc/train_dataset/extractors/__init__.py rename to labeling/train_dataset/taskers/__init__.py diff --git a/dedoc/train_dataset/taskers/__init__.py b/labeling/train_dataset/taskers/concrete_taskers/__init__.py similarity index 100% rename from dedoc/train_dataset/taskers/__init__.py rename to labeling/train_dataset/taskers/concrete_taskers/__init__.py diff --git a/dedoc/train_dataset/taskers/concrete_taskers/abstract_line_label_tasker.py b/labeling/train_dataset/taskers/concrete_taskers/abstract_line_label_tasker.py similarity index 89% rename from dedoc/train_dataset/taskers/concrete_taskers/abstract_line_label_tasker.py rename to labeling/train_dataset/taskers/concrete_taskers/abstract_line_label_tasker.py index 877cd4ac..247b529c 100644 --- a/dedoc/train_dataset/taskers/concrete_taskers/abstract_line_label_tasker.py +++ b/labeling/train_dataset/taskers/concrete_taskers/abstract_line_label_tasker.py @@ -9,15 +9,15 @@ from tempfile import TemporaryDirectory from typing import Callable, Iterable, List -from dedoc.train_dataset.data_structures.images_archive import ImagesArchive -from dedoc.train_dataset.data_structures.task_item import TaskItem -from dedoc.train_dataset.taskers.concrete_taskers.abstract_tasker import AbstractTasker -from dedoc.train_dataset.train_dataset_utils import get_original_document_path +from dedoc.utils.train_dataset_utils import get_original_document_path +from train_dataset.data_structures.images_archive import ImagesArchive +from train_dataset.data_structures.task_item import TaskItem +from train_dataset.taskers.concrete_taskers.abstract_tasker import AbstractTasker class AbstractLineLabelTasker(AbstractTasker): - def __init__(self, path2bboxes: str, + def __init__(self, path2lines: str, path2docs: str, manifest_path: str, @@ -28,8 +28,6 @@ def __init__(self, path2bboxes: str, *, config: dict) -> None: """ - - @param path2bboxes: path to file with bboxes @param path2lines: path to file with line with meta information @param path2docs: path to images (or original document) @param manifest_path: path to manifest file (instruction for annotator) @@ -43,7 +41,6 @@ def __init__(self, path2bboxes: str, self.manifest_path = manifest_path self.path2docs = path2docs self.path2lines = path2lines - self.path2bboxes = path2bboxes self.item2label = item2label if item2label is not None else (lambda t: None) self.config = config self.logger = config.get("logger", logging.getLogger()) @@ -133,13 +130,7 @@ def _one_scanned_page(self, page: List[dict], task_archive: zipfile.ZipFile, tas pass def _get_pages(self) -> List[List[dict]]: - bboxes = {bbox["_uid"]: bbox for bbox in self._read_json(self.path2bboxes, required=False)} lines = self._read_json(self.path2lines, required=True) - for line in lines: - line_uid = line["_uid"] - if line_uid in bboxes: - line["bbox"] = bboxes[line_uid] - # line["original_document"] = bboxes[line_uid]["original_image"] pages = defaultdict(list) for line in lines: if "original_document" in line: diff --git a/dedoc/train_dataset/taskers/concrete_taskers/abstract_tasker.py b/labeling/train_dataset/taskers/concrete_taskers/abstract_tasker.py similarity index 87% rename from dedoc/train_dataset/taskers/concrete_taskers/abstract_tasker.py rename to labeling/train_dataset/taskers/concrete_taskers/abstract_tasker.py index 3141f039..82d91649 100644 --- a/dedoc/train_dataset/taskers/concrete_taskers/abstract_tasker.py +++ b/labeling/train_dataset/taskers/concrete_taskers/abstract_tasker.py @@ -7,7 +7,7 @@ class AbstractTasker(ABC): - resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "resources")) + resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "resources")) @abstractmethod def create_tasks(self, task_size: int, tasks_uid: str) -> Iterable[str]: @@ -47,11 +47,11 @@ def _add_docker_files(self, archive: zipfile.ZipFile, task_directory: str, docke @return: archive with docker file """ - docker_path = os.path.join(self.resources_path, "train_dataset", dockerfile_directory, "Dockerfile") + docker_path = os.path.join(self.resources_path, dockerfile_directory, "Dockerfile") archive.write(filename=docker_path, arcname=f"{task_directory}/Dockerfile") - readme_path = os.path.join(self.resources_path, "train_dataset", dockerfile_directory, "README.md") + readme_path = os.path.join(self.resources_path, dockerfile_directory, "README.md") archive.write(filename=readme_path, arcname=f"{task_directory}/README.md") - run_path = os.path.join(self.resources_path, "train_dataset", dockerfile_directory, "run.sh") + run_path = os.path.join(self.resources_path, dockerfile_directory, "run.sh") archive.write(filename=run_path, arcname=f"{task_directory}/run.sh") return archive diff --git a/dedoc/train_dataset/taskers/concrete_taskers/filtered_line_label_tasker.py b/labeling/train_dataset/taskers/concrete_taskers/filtered_line_label_tasker.py similarity index 76% rename from dedoc/train_dataset/taskers/concrete_taskers/filtered_line_label_tasker.py rename to labeling/train_dataset/taskers/concrete_taskers/filtered_line_label_tasker.py index 56d3269d..545e8c7b 100644 --- a/dedoc/train_dataset/taskers/concrete_taskers/filtered_line_label_tasker.py +++ b/labeling/train_dataset/taskers/concrete_taskers/filtered_line_label_tasker.py @@ -3,16 +3,15 @@ from dedoc.data_structures.line_metadata import LineMetadata from dedoc.data_structures.line_with_meta import LineWithMeta from dedoc.structure_extractors.feature_extractors.toc_feature_extractor import TOCFeatureExtractor -from dedoc.train_dataset.taskers.concrete_taskers.line_label_tasker import LineLabelTasker -from dedoc.train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator -from dedoc.train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator -from dedoc.train_dataset.taskers.images_creators.image_creator_composition import ImageCreatorComposition +from train_dataset.taskers.concrete_taskers.line_label_tasker import LineLabelTasker +from train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator +from train_dataset.taskers.images_creators.image_creator_composition import ImageCreatorComposition class FilteredLineLabelTasker(LineLabelTasker): def __init__(self, - path2bboxes: str, path2lines: str, path2docs: str, manifest_path: str, @@ -22,7 +21,7 @@ def __init__(self, item2label: Callable = None, *, config: dict) -> None: - super().__init__(path2bboxes, path2lines, path2docs, manifest_path, config_path, tmp_dir, progress_bar, item2label, config=config) + super().__init__(path2lines, path2docs, manifest_path, config_path, tmp_dir, progress_bar, item2label, config=config) # we can use page numbers only in pdf self.images_creators = ImageCreatorComposition(creators=[ ScannedImagesCreator(path2docs=self.path2docs), diff --git a/dedoc/train_dataset/taskers/concrete_taskers/header_footer_tasker.py b/labeling/train_dataset/taskers/concrete_taskers/header_footer_tasker.py similarity index 67% rename from dedoc/train_dataset/taskers/concrete_taskers/header_footer_tasker.py rename to labeling/train_dataset/taskers/concrete_taskers/header_footer_tasker.py index 4ecff06b..73e706f0 100644 --- a/dedoc/train_dataset/taskers/concrete_taskers/header_footer_tasker.py +++ b/labeling/train_dataset/taskers/concrete_taskers/header_footer_tasker.py @@ -2,13 +2,13 @@ from collections import defaultdict from typing import Callable, List -from dedoc.train_dataset.data_structures.images_archive import ImagesArchive -from dedoc.train_dataset.data_structures.task_item import TaskItem -from dedoc.train_dataset.taskers.concrete_taskers.line_label_tasker import LineLabelTasker -from dedoc.train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator -from dedoc.train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator -from dedoc.train_dataset.taskers.images_creators.concrete_creators.txt_images_creator import TxtImagesCreator -from dedoc.train_dataset.taskers.images_creators.image_creator_composition import ImageCreatorComposition +from train_dataset.data_structures.images_archive import ImagesArchive +from train_dataset.data_structures.task_item import TaskItem +from train_dataset.taskers.concrete_taskers.line_label_tasker import LineLabelTasker +from train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.txt_images_creator import TxtImagesCreator +from train_dataset.taskers.images_creators.image_creator_composition import ImageCreatorComposition class HeaderFooterTasker(LineLabelTasker): @@ -16,7 +16,6 @@ class HeaderFooterTasker(LineLabelTasker): bottom_lines_count = 3 def __init__(self, - path2bboxes: str, path2lines: str, path2docs: str, manifest_path: str, @@ -26,7 +25,7 @@ def __init__(self, item2label: Callable = None, *, config: dict) -> None: - super().__init__(path2bboxes, path2lines, path2docs, manifest_path, config_path, tmp_dir, progress_bar, item2label, config=config) + super().__init__(path2lines, path2docs, manifest_path, config_path, tmp_dir, progress_bar, item2label, config=config) self.images_creators = ImageCreatorComposition(creators=[ ScannedImagesCreator(path2docs=self.path2docs), DocxImagesCreator(path2docs=self.path2docs, config=config), diff --git a/dedoc/train_dataset/taskers/concrete_taskers/line_label_tasker.py b/labeling/train_dataset/taskers/concrete_taskers/line_label_tasker.py similarity index 74% rename from dedoc/train_dataset/taskers/concrete_taskers/line_label_tasker.py rename to labeling/train_dataset/taskers/concrete_taskers/line_label_tasker.py index fe9306e6..2ffb0468 100644 --- a/dedoc/train_dataset/taskers/concrete_taskers/line_label_tasker.py +++ b/labeling/train_dataset/taskers/concrete_taskers/line_label_tasker.py @@ -3,19 +3,18 @@ from io import BytesIO from typing import Callable, List -from dedoc.train_dataset.data_structures.images_archive import ImagesArchive -from dedoc.train_dataset.data_structures.task_item import TaskItem -from dedoc.train_dataset.taskers.concrete_taskers.abstract_line_label_tasker import AbstractLineLabelTasker -from dedoc.train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator -from dedoc.train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator -from dedoc.train_dataset.taskers.images_creators.concrete_creators.txt_images_creator import TxtImagesCreator -from dedoc.train_dataset.taskers.images_creators.image_creator_composition import ImageCreatorComposition +from train_dataset.data_structures.images_archive import ImagesArchive +from train_dataset.data_structures.task_item import TaskItem +from train_dataset.taskers.concrete_taskers.abstract_line_label_tasker import AbstractLineLabelTasker +from train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.txt_images_creator import TxtImagesCreator +from train_dataset.taskers.images_creators.image_creator_composition import ImageCreatorComposition class LineLabelTasker(AbstractLineLabelTasker): def __init__(self, - path2bboxes: str, path2lines: str, path2docs: str, manifest_path: str, @@ -25,7 +24,7 @@ def __init__(self, item2label: Callable = None, *, config: dict) -> None: - super().__init__(path2bboxes, path2lines, path2docs, manifest_path, config_path, tmp_dir, progress_bar, item2label, config=config) + super().__init__(path2lines, path2docs, manifest_path, config_path, tmp_dir, progress_bar, item2label, config=config) self.images_creators = ImageCreatorComposition(creators=[ ScannedImagesCreator(path2docs=self.path2docs), DocxImagesCreator(path2docs=self.path2docs, config=config), diff --git a/dedoc/train_dataset/taskers/concrete_taskers/table_tasker.py b/labeling/train_dataset/taskers/concrete_taskers/table_tasker.py similarity index 85% rename from dedoc/train_dataset/taskers/concrete_taskers/table_tasker.py rename to labeling/train_dataset/taskers/concrete_taskers/table_tasker.py index e6856943..b2506d45 100644 --- a/dedoc/train_dataset/taskers/concrete_taskers/table_tasker.py +++ b/labeling/train_dataset/taskers/concrete_taskers/table_tasker.py @@ -7,11 +7,11 @@ from PIL import Image from dedocutils.data_structures import BBox -from dedoc.train_dataset.data_path_config import table_path -from dedoc.train_dataset.data_structures.task_item import TaskItem -from dedoc.train_dataset.taskers.concrete_taskers.abstract_tasker import AbstractTasker +from dedoc.config import get_config from dedoc.utils.image_utils import draw_rectangle from dedoc.utils.utils import get_batch +from train_dataset.data_structures.task_item import TaskItem +from train_dataset.taskers.concrete_taskers.abstract_tasker import AbstractTasker class File: @@ -33,6 +33,9 @@ def data(self) -> dict: class TableTasker(AbstractTasker): + def __init__(self) -> None: + self.table_path = get_config().get("table_path", "/tmp/tables") + def create_tasks(self, task_size: int, tasks_uid: str) -> Iterable[str]: files = self._get_files() with tempfile.TemporaryDirectory() as tmp_dir: @@ -42,13 +45,15 @@ def create_tasks(self, task_size: int, tasks_uid: str) -> Iterable[str]: image_directory = f"{task_directory}/images" with ZipFile(archive_path, "a") as task_archive: self.__add_task(archive=task_archive, files=batch, task_directory=task_directory) - dockerfile_directory = os.path.join(self.resources_path, "train_dataset/img_classifier_dockerfile") + dockerfile_directory = os.path.join(self.resources_path, "img_classifier_dockerfile") self._add_docker_files(archive=task_archive, task_directory=task_directory, dockerfile_directory=dockerfile_directory) self._add_config(task_archive=task_archive, task_name=task_directory, task_directory=task_directory, - config_path=os.path.join(self.resources_path, "train_dataset/tables/config.json"), + config_path=os.path.join(self.resources_path, "tables", "config.json"), tmp_dir=tmp_dir) + manifest_path = os.path.join(self.resources_path, "tables", "manifest.pdf") + task_archive.write(manifest_path, os.path.join(task_directory, os.path.basename(manifest_path))) self.__add_images(files=files, archive=task_archive, image_directory=image_directory) yield archive_path @@ -71,11 +76,11 @@ def get_original_documents(self) -> str: return self._add_files_to_archive(files=files, archive_path=archive_path) def _get_files(self) -> List[File]: - files = {file.split(".")[0] for file in os.listdir(table_path)} + files = {file.split(".")[0] for file in os.listdir(self.table_path)} result = [] for file_name in sorted(files): - image_path = os.path.join(table_path, f"{file_name}.png") - json_path = os.path.join(table_path, f"{file_name}.json") + image_path = os.path.join(self.table_path, f"{file_name}.png") + json_path = os.path.join(self.table_path, f"{file_name}.json") file = File(image_path=image_path, json_path=json_path) result.append(file) return result diff --git a/dedoc/train_dataset/taskers/concrete_taskers/__init__.py b/labeling/train_dataset/taskers/images_creators/__init__.py similarity index 100% rename from dedoc/train_dataset/taskers/concrete_taskers/__init__.py rename to labeling/train_dataset/taskers/images_creators/__init__.py diff --git a/dedoc/train_dataset/taskers/images_creators/__init__.py b/labeling/train_dataset/taskers/images_creators/concrete_creators/__init__.py similarity index 100% rename from dedoc/train_dataset/taskers/images_creators/__init__.py rename to labeling/train_dataset/taskers/images_creators/concrete_creators/__init__.py diff --git a/dedoc/train_dataset/taskers/images_creators/concrete_creators/abstract_images_creator.py b/labeling/train_dataset/taskers/images_creators/concrete_creators/abstract_images_creator.py similarity index 90% rename from dedoc/train_dataset/taskers/images_creators/concrete_creators/abstract_images_creator.py rename to labeling/train_dataset/taskers/images_creators/concrete_creators/abstract_images_creator.py index bde98d02..96384017 100644 --- a/dedoc/train_dataset/taskers/images_creators/concrete_creators/abstract_images_creator.py +++ b/labeling/train_dataset/taskers/images_creators/concrete_creators/abstract_images_creator.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from typing import List -from dedoc.train_dataset.data_structures.images_archive import ImagesArchive +from train_dataset.data_structures.images_archive import ImagesArchive class AbstractImagesCreator(ABC): diff --git a/dedoc/train_dataset/taskers/images_creators/concrete_creators/docx_images_creator.py b/labeling/train_dataset/taskers/images_creators/concrete_creators/docx_images_creator.py similarity index 97% rename from dedoc/train_dataset/taskers/images_creators/concrete_creators/docx_images_creator.py rename to labeling/train_dataset/taskers/images_creators/concrete_creators/docx_images_creator.py index f6ec7b54..09b1e5d8 100644 --- a/dedoc/train_dataset/taskers/images_creators/concrete_creators/docx_images_creator.py +++ b/labeling/train_dataset/taskers/images_creators/concrete_creators/docx_images_creator.py @@ -21,9 +21,9 @@ from dedoc.readers.docx_reader.data_structures.docx_document import DocxDocument from dedoc.readers.docx_reader.data_structures.paragraph import Paragraph from dedoc.readers.pdf_reader.pdf_image_reader.pdf_image_reader import PdfImageReader -from dedoc.train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator -from dedoc.train_dataset.train_dataset_utils import get_original_document_path from dedoc.utils.image_utils import get_concat_v +from dedoc.utils.train_dataset_utils import get_original_document_path +from train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator PairedPdf = namedtuple("PairedPdf", ["many_color_pdf", "two_color_pdf", "many_colors", "two_colors"]) @@ -45,7 +45,7 @@ def add_images(self, page: List[dict], archive: zipfile.ZipFile) -> None: """ The algorithm if as follows: 1 create two pdf: the first with bboxes around paragraph (each of different color) and the second with - bboxe of two colors. + bboxes of two colors. 2 We read images from both pdf (one page by one) and subtracting first image from the second (we get image with nonzero pixels on bboxes only) 3 we clear bboxes from first image @@ -73,7 +73,7 @@ def add_images(self, page: List[dict], archive: zipfile.ZipFile) -> None: def __create_pair_pdfs(self, docx_archive: zipfile.ZipFile, document: DocxDocument, tmp_dir: str) -> PairedPdf: """ - here we create two paired pdfs, we modify docx xml (drow bbox around paragraph) and create pdf, based on this + here we create two paired pdfs, we modify docx xml (draw bbox around paragraph) and create pdf, based on this modified docx. We create pdf with multi colors and with two colors @param docx_archive: opened docx document (docx is a zip archive) @param document: parsed document diff --git a/dedoc/train_dataset/taskers/images_creators/concrete_creators/scanned_images_creator.py b/labeling/train_dataset/taskers/images_creators/concrete_creators/scanned_images_creator.py similarity index 59% rename from dedoc/train_dataset/taskers/images_creators/concrete_creators/scanned_images_creator.py rename to labeling/train_dataset/taskers/images_creators/concrete_creators/scanned_images_creator.py index 60acbfe2..89fd5d2c 100644 --- a/dedoc/train_dataset/taskers/images_creators/concrete_creators/scanned_images_creator.py +++ b/labeling/train_dataset/taskers/images_creators/concrete_creators/scanned_images_creator.py @@ -1,3 +1,4 @@ +import json import os import tempfile import zipfile @@ -11,10 +12,9 @@ from PIL.Image import Image from pdf2image import convert_from_path -from dedoc.train_dataset.data_structures.images_archive import ImagesArchive -from dedoc.train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator -from dedoc.train_dataset.train_dataset_utils import get_original_document_path from dedoc.utils.image_utils import draw_rectangle +from dedoc.utils.train_dataset_utils import get_original_document_path +from train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator class ScannedImagesCreator(AbstractImagesCreator): @@ -22,15 +22,19 @@ class ScannedImagesCreator(AbstractImagesCreator): def __init__(self, path2docs: str) -> None: self.path2docs = path2docs - @staticmethod - def _draw_one_bbox(image: np.ndarray, line: dict) -> np.ndarray: - bbox = line["bbox"]["bbox"] - image_bbox = draw_rectangle(image=image, - x_top_left=bbox["x_top_left"], - y_top_left=bbox["y_top_left"], - width=bbox["width"], - height=bbox["height"], - color=line.get("color", "black")) + def __draw_one_bbox(self, image: PIL.Image, line: dict, pdf_image: bool = False) -> np.ndarray: + bbox_annotation = self.__get_bbox_annotations(line)[0] + bbox = json.loads(bbox_annotation["value"]) + if pdf_image: + image = image.resize(size=(bbox["page_width"], bbox["page_height"]), resample=PIL.Image.BICUBIC) + image_bbox = draw_rectangle( + image=image, + x_top_left=int(bbox["x_top_left"] * bbox["page_width"]), + y_top_left=int(bbox["y_top_left"] * bbox["page_height"]), + width=bbox["width"] * bbox["page_width"], + height=bbox["height"] * bbox["page_height"], + color=line.get("color", (0, 0, 0)) + ) image_bbox = cv2.resize(np.array(image_bbox), (1276, 1754)) return image_bbox @@ -42,11 +46,9 @@ def add_images(self, page: List[dict], archive: zipfile.ZipFile) -> None: @return: """ path = get_original_document_path(self.path2docs, page) - page = [line for line in page if "bbox" in line] + page = [line for line in page if self.__get_bbox_annotations(line)] if path.endswith("pdf"): images = self._create_image_pdf(path=path, page=page) - elif path.endswith("zip"): - images = self._create_image_zip(path=path, page=page) else: images = self._create_image_jpg(path=path, page=page) for image, line in zip_longest(images, page): @@ -63,22 +65,7 @@ def can_read(self, page: List[dict]) -> bool: @return: """ image_name = get_original_document_path(self.path2docs, page) - return image_name.endswith(("png", "jpg", "jpeg", "pdf", "zip")) - - def _create_image_zip(self, path: str, page: List[dict]) -> Iterator[Image]: - current_image = None - current_page = None - archive = ImagesArchive(path) - for line in page: - page_id = line["_metadata"]["page_id"] - if current_image is None or current_page != page_id: - current_page = page_id - current_image = archive.get_page(page_id) - image = deepcopy(current_image) - image_bbox = self._draw_one_bbox(image, line) - image_bbox = PIL.Image.fromarray(image_bbox) - image_bbox = image_bbox.convert("RGB") - yield image_bbox + return image_name.endswith(("png", "jpg", "jpeg", "pdf")) def _create_image_pdf(self, path: str, page: List[dict]) -> Iterator[Image]: current_image = None @@ -89,7 +76,7 @@ def _create_image_pdf(self, path: str, page: List[dict]) -> Iterator[Image]: current_page = page_id current_image = convert_from_path(path, first_page=current_page, last_page=current_page + 1)[0] image = deepcopy(current_image) - image_bbox = self._draw_one_bbox(image, line) + image_bbox = self.__draw_one_bbox(image, line, pdf_image=True) image_bbox = PIL.Image.fromarray(image_bbox) image_bbox = image_bbox.convert("RGB") yield image_bbox @@ -97,7 +84,11 @@ def _create_image_pdf(self, path: str, page: List[dict]) -> Iterator[Image]: def _create_image_jpg(self, path: str, page: List[dict]) -> Iterator[Image]: image = PIL.Image.open(path) for line in page: - image_bbox = self._draw_one_bbox(image, line) + image_bbox = self.__draw_one_bbox(image, line) image_bbox = PIL.Image.fromarray(image_bbox) image_bbox = image_bbox.convert("RGB") yield image_bbox + + def __get_bbox_annotations(self, line: dict) -> List[dict]: + bbox_annotations = [annotation for annotation in line["_annotations"] if annotation["name"] == "bounding box"] + return bbox_annotations diff --git a/dedoc/train_dataset/taskers/images_creators/concrete_creators/txt_images_creator.py b/labeling/train_dataset/taskers/images_creators/concrete_creators/txt_images_creator.py similarity index 95% rename from dedoc/train_dataset/taskers/images_creators/concrete_creators/txt_images_creator.py rename to labeling/train_dataset/taskers/images_creators/concrete_creators/txt_images_creator.py index d8e9c082..e804f15f 100644 --- a/dedoc/train_dataset/taskers/images_creators/concrete_creators/txt_images_creator.py +++ b/labeling/train_dataset/taskers/images_creators/concrete_creators/txt_images_creator.py @@ -8,9 +8,9 @@ from dedoc.data_structures.line_with_meta import LineWithMeta from dedoc.readers.txt_reader.raw_text_reader import RawTextReader -from dedoc.train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator -from dedoc.train_dataset.train_dataset_utils import get_original_document_path +from dedoc.utils.train_dataset_utils import get_original_document_path from dedoc.utils.utils import get_batch +from train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator class TxtImagesCreator(AbstractImagesCreator): @@ -31,7 +31,7 @@ def __init__(self, path2docs: str, *, config: dict) -> None: self.background_color = (255, 255, 255) self.border_color = (0, 0, 0) - font_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "resources", "Arial_Narrow.ttf")) + font_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "resources", "Arial_Narrow.ttf")) self.font = ImageFont.truetype(font_path, self.font_size) self.txt_reader = RawTextReader(config=config) diff --git a/dedoc/train_dataset/taskers/images_creators/image_creator_composition.py b/labeling/train_dataset/taskers/images_creators/image_creator_composition.py similarity index 74% rename from dedoc/train_dataset/taskers/images_creators/image_creator_composition.py rename to labeling/train_dataset/taskers/images_creators/image_creator_composition.py index 30e7a2bd..6e00eb94 100644 --- a/dedoc/train_dataset/taskers/images_creators/image_creator_composition.py +++ b/labeling/train_dataset/taskers/images_creators/image_creator_composition.py @@ -2,10 +2,10 @@ import zipfile from typing import List -from dedoc.train_dataset.data_structures.images_archive import ImagesArchive -from dedoc.train_dataset.exceptions.empty_page_error import EmptyPageError -from dedoc.train_dataset.exceptions.task_creation_error import TaskCreationError -from dedoc.train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator +from train_dataset.data_structures.images_archive import ImagesArchive +from train_dataset.exceptions.empty_page_error import EmptyPageError +from train_dataset.exceptions.task_creation_error import TaskCreationError +from train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator class ImageCreatorComposition: diff --git a/dedoc/train_dataset/taskers/tasker.py b/labeling/train_dataset/taskers/tasker.py similarity index 91% rename from dedoc/train_dataset/taskers/tasker.py rename to labeling/train_dataset/taskers/tasker.py index df8bb931..587f0e22 100644 --- a/dedoc/train_dataset/taskers/tasker.py +++ b/labeling/train_dataset/taskers/tasker.py @@ -6,14 +6,13 @@ from typing import Dict, Optional, Tuple from zipfile import ZipFile -from dedoc.train_dataset.exceptions.unknown_task_error import UnknownTaskError -from dedoc.train_dataset.taskers.concrete_taskers.abstract_tasker import AbstractTasker +from train_dataset.exceptions.unknown_task_error import UnknownTaskError +from train_dataset.taskers.concrete_taskers.abstract_tasker import AbstractTasker class Tasker(object): def __init__(self, - boxes_label_path: str, line_info_path: str, images_path: str, save_path: str, @@ -23,7 +22,6 @@ def __init__(self, *, config: dict) -> None: """ - :param boxes_label_path: abs path to boxes.jsonlines file :param line_info_path: abs path to lines.jsonlines :param images_path: abs path to folder of images :param save_path: abs path to save of result archive of tasks @@ -32,7 +30,6 @@ def __init__(self, """ self.config = config if config is not None else {} - self.boxes_label_path = boxes_label_path self.line_info_path = line_info_path self.images_path = images_path os.makedirs(save_path, exist_ok=True) @@ -41,7 +38,7 @@ def __init__(self, self.tmd_dir = tmd_dir self.concrete_taskers = concrete_taskers self.progress_bar = {} if progress_bar is None else progress_bar - resources_path = os.path.join(os.path.dirname(__file__), "..", "..", "..", "resources", "train_dataset") + resources_path = os.path.join(os.path.dirname(__file__), "..", "..", "resources") self.resources = os.path.abspath(resources_path) def create_tasks(self, type_of_task: str, count_tasks: int = None, task_size: int = None, task_uid: Optional[str] = None) -> Tuple[str, int]: diff --git a/pyproject.toml b/pyproject.toml index ca2b0344..f0f8754c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,6 @@ dependencies = {file = ["requirements.txt"]} [tool.setuptools.packages.find] where = ["."] include = ["dedoc*"] -exclude = ["dedoc.scripts*"] [project.optional-dependencies] torch = ["torch~=1.11.0", "torchvision~=0.12.0"] @@ -42,6 +41,8 @@ docs = [ "docutils==0.18.1", "Sphinx==6.2.1", "sphinx_rtd_theme==1.2.0", # for using sphinx_rtd_theme + "sphinx_copybutton==0.5.2", # for using copy button in code examples + "sphinx-togglebutton==0.3.2", # for using toggle button "linuxdoc==20230506", # for using flat-table "tabula-py==2.8.1", # for adding new doc type tutorial ] diff --git a/requirements.txt b/requirements.txt index 3af48409..3b967a2c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ beautifulsoup4>=4.10.0,<=4.12.2 charset-normalizer>=2.0.12,<=3.2.0 Cython>=0.29.28,<=3.0.2 docx==0.2.4 -dedoc-utils==0.3.5 +dedoc-utils==0.3.6 fastapi>=0.77.0,<=0.103.0 huggingface-hub>=0.14.1,<=0.16.4 imutils==0.5.4 @@ -27,7 +27,7 @@ rarfile==4.0 requests>=2.22.0 roman>=3.3,<4.0 scikit-image>=0.19.3,<=0.21.0 -scikit_learn>=1.0.2,<=1.3.0 +scikit_learn>=1.0.2,<=1.2.2 scipy>=1.8.0,<=1.11.2 six==1.14.0 starlette>=0.26.1,<=0.27.0 diff --git a/resources/benchmarks/table_benchmark.json b/resources/benchmarks/table_benchmark.json new file mode 100644 index 00000000..d7a9d7c6 --- /dev/null +++ b/resources/benchmarks/table_benchmark.json @@ -0,0 +1,16 @@ +{ + "mode_metric_structure_only": false, + "mean": 0.9468374367023571, + "images": { + "example_with_table0_0.png": 0.9525583036909738, + "example_with_table0_1.png": 0.9264351862896008, + "example_with_table6.png": 0.989010989010989, + "example_with_table4.jpg": 0.908436211832951, + "example_with_table17.jpg": 0.8078952936402488, + "example_with_table_hor_vert_union.png": 0.9896091617933723, + "example_with_table1.png": 0.9781560283687943, + "example_with_table_horizontal_union.jpg": 0.9925757575757576, + "example_with_table3.png": 0.9778008866078716, + "example_with_table5.png": 0.9458965482130129 + } +} \ No newline at end of file diff --git a/resources/benchmarks/table_benchmark_on_generated_data.json b/resources/benchmarks/table_benchmark_on_generated_data.json new file mode 100644 index 00000000..130bcd28 --- /dev/null +++ b/resources/benchmarks/table_benchmark_on_generated_data.json @@ -0,0 +1,506 @@ +{ + "mode_metric_structure_only": true, + "mean": 0.9467889492889642, + "images": { + "0OEG7D5CXUSXDNEXAZ8A3.png": 0.993103448275862, + "0IS8OPRTM71QYN821WA5S.png": 0.9878048780487805, + "0KX1D4AGMTM3EWR0EF0A5.png": 0.989010989010989, + "0QBK1U71YOHBG5Z23MT7E.png": 0.9916666666666667, + "0DC57AS1OYZ1BRHZHPIO2.png": 0.96, + "0GJE73OG32H2P2SL2AI2J.png": 0.9905660377358491, + "0GYAQKWTI3LN6DNZFM2TZ.png": 0.9904761904761905, + "0GHKLO6LOH5LBTYEUND3S.png": 0.9917355371900827, + "0F831FOUA10K3594FG4IM.png": 0.9896907216494846, + "0XG0I2F0MMZ3QMXWLWFMX.png": 0.9935064935064936, + "0WMTO9U10ILEB9HCX4C0B.png": 0.9863013698630136, + "0C1ZYGFL2YNFM2W3P2KN1.png": 0.9795918367346939, + "0A4G5JAZSJS4BT5LBZ2Q3.png": 0.9850746268656716, + "0I75SMSDR5JSJXF07PN6J.png": 0.9915966386554622, + "0K9EAAIYXSUT80SYF3ML4.png": 0.9836065573770492, + "0PH78O2B9CJAM6MMINZXT.png": 0.9876543209876543, + "1BRZ4ALOZMMEXGR4AVJWG.png": 0.9876543209876543, + "0AFVW6AL3EH9H76ONNDYF.png": 0.9848484848484849, + "0EVCQHN9C65AUYG1UAN3C.png": 0.9696969696969697, + "0DD9D0ILAPJIH77GEVRGP.png": 0.4098360655737705, + "1BHU2JO8ODKS3OL4RIU6A.png": 0.9905660377358491, + "0OG2AZLHJPMBX43O2O9LR.png": 0.989010989010989, + "0M64SMZT9HTN6LXQ4M24T.png": 0.9846153846153847, + "0TY12X0C3U2BPZC81PW66.png": 0.9836065573770492, + "0W109P7LI6B5HIYM3SJ5A.png": 0.9873417721518988, + "0H6272E6S2YUDJWBSWKQN.png": 0.9873417721518988, + "0ZRX97WSSVCVQ3NJ5959P.png": 0.9873417721518988, + "0ZFX4HDI3O7YQFDYRRYKI.png": 0.9722222222222222, + "1A4SDGAXB66WDBW7OUH58.png": 0.9876543209876543, + "0HQVUJMOQRQQ5FIP4PMZF.png": 0.9859154929577465, + "0Q335MQBC8UJJMASJUNWZ.png": 0.9938271604938271, + "0AF02R419WL1YN97ZV144.png": 0.9868421052631579, + "0JWOGY4C0KQ14J958GLYD.png": 0.9922480620155039, + "0JCVUE03Y5YD8A45IOIA5.png": 0.972972972972973, + "0R10PSLELMJ0SPFCXX92A.png": 0.9814814814814815, + "0BF411IVR1HLU1Q44I3K7.png": 0.990990990990991, + "0K9C1HJZ8K3L6CRAQ6VCW.png": 0.9850746268656716, + "0Q2MRICBMAFRV1GRRR5TA.png": 0.5189873417721519, + "0ZK44UG99IWIPKRSCOSJV.png": 0.9795918367346939, + "0S09D3ZPVQ8YOT55XIOE1.png": 0.9887640449438202, + "0KRLZUD3DQAU1DYDU99ZH.png": 0.9882352941176471, + "1ATIOLLN3DOAHKX75560Q.png": 0.5460992907801419, + "0KSBBUINDNN16F2ZLQHV4.png": 0.9915254237288136, + "0P5IE8XH9BN2EGC0DX27Z.png": 0.989010989010989, + "0GJ88Q9SMUOWF3WILKG14.png": 0.98, + "0QYA242XOQ0Y9078UC7NI.png": 0.984375, + "0F4Z8B4S5RV008LHJBW8S.png": 0.9896907216494846, + "0GWJH40B21AJBR1F73FXI.png": 0.9722222222222222, + "0ZO44O69QHTV62QJ3X9KH.png": 0.9883720930232558, + "0VK3KLUJVLAB9SRQDN6EJ.png": 0.45622119815668205, + "0HQHS3BO0IIOJ5L2EP2H4.png": 0.9882352941176471, + "1DBQ2M6XQ66Y2895PYNOM.png": 0.9824561403508771, + "0A4OYW3ZL5QP76IGF0DK0.png": 0.9836065573770492, + "1AFVW6AL3EH9H76ONNDYF.png": 0.9863013698630136, + "0J0JQM9WD7B0RCNKWBC5S.png": 0.972972972972973, + "0OVSLM3WAA36TZQCOL1WS.png": 0.9821428571428571, + "0GMEN2MGE7HN3ROOZQ5YD.png": 0.9879518072289156, + "0AJLKOKRHEVOTGE90GEH6.png": 0.9922480620155039, + "0DUX4YKT5JYJO3Z573OG8.png": 0.967741935483871, + "0A4SDGAXB66WDBW7OUH58.png": 0.9871794871794872, + "0G5S5CXGRLABEYII4QG2Z.png": 0.9887640449438202, + "0YM92E2EEDDGHAUW2YZ8Q.png": 0.9863013698630136, + "0F0E32N4VR4Q9960I0DB8.png": 0.975609756097561, + "0JNQPLSGKLPQ0UAAFYL5T.png": 0.967741935483871, + "0YL4VFF3LUUQITLVU3U9V.png": 0.9902912621359223, + "0ZO2Z3XCHZLB43ARH68WS.png": 0.9803921568627451, + "0SHL8BKLII1AGBZ1SEB4U.png": 0.9876543209876543, + "0KBLEG9N1SBX956ZCIP5I.png": 0.9795918367346939, + "1B1QX4K8U8P9QA3HVLRPN.png": 0.9896907216494846, + "0D0ZG3O9YHMQAPHCD0890.png": 0.9924242424242424, + "0PTMPFGYNVJWO6FCX1QRZ.png": 0.9933333333333333, + "0A7ZA5BA5TPHBN2WP6TT9.png": 0.9904761904761905, + "0DE9UIIVMYH3UK0SYFVUG.png": 0.9767441860465116, + "0U0LFAJATVD9YEC1Z3497.png": 0.9767441860465116, + "1A3YX0911ULBZSCUBNDZS.png": 0.9929577464788732, + "0WRQRWHH2CMV2L4CE3SN9.png": 0.9878048780487805, + "0C1X00FENSOUN2Y08Y3JT.png": 0.9824561403508771, + "0ONRU7A4SU4WAUWF25FRP.png": 0.9878048780487805, + "1DD9D0ILAPJIH77GEVRGP.png": 0.9824561403508771, + "0CBFM7HG55Z7O8F4Y0O0L.png": 0.9905660377358491, + "0C9EM94JJTICVGS6U2T2U.png": 0.9917355371900827, + "0KK57808VO3HNS1AW4CJO.png": 0.9767441860465116, + "0E7XFLPH56MT23HNK3MZ6.png": 0.9886363636363636, + "0J818KH6HIIA83D74FXS3.png": 0.9887640449438202, + "0TLMD42BW0F4NSD9PG19X.png": 0.984375, + "0PSN5QFZWTPA9U05O7MZ3.png": 0.99, + "0GI0JNFJAOXK5OJKRXCND.png": 0.9891304347826086, + "0Z4X1LVZ1K4NE2RR8P7EA.png": 0.9859154929577465, + "0S709DW5AZF9VPCPMVHXB.png": 0.9891304347826086, + "1B87OEX5XX0BHUOQAS50A.png": 0.9767441860465116, + "1AAERNSDA06GDA7OFZVCA.png": 0.9850746268656716, + "1CORAY089OILX2OWIKU1E.png": 0.98, + "0WO86MK2DC2EZUZLSMFA1.png": 0.9911504424778761, + "0KNUPYHEXZYSW1TNZ6I7L.png": 0.9846153846153847, + "0ONF59OAQYX89LAM941E6.png": 0.9859154929577465, + "0E1IVAEMQXKVCH3Q0JCVX.png": 0.9782608695652174, + "0K8YJZK75V8SXL0GIM4SU.png": 0.9896907216494846, + "0LUL2CVQ1HLC1KL6D2VMP.png": 0.4065934065934066, + "0WIEGQEF4G9LN2UM49Y12.png": 0.9876543209876543, + "0AQ9EL10BYBSGJO2RLC6Q.png": 0.9888888888888889, + "0M0WYXRJONRUQ3ZG24MJJ.png": 0.989010989010989, + "0E3XQJO1C4CKR9TNFB4IC.png": 0.9871794871794872, + "0H17CYXGJTHXPQUP51TBI.png": 0.9911504424778761, + "0NK736IIIHGBF52E1UKQ4.png": 0.9859154929577465, + "0DYIHMLOKOR6HNF2XAI8F.png": 0.9836065573770492, + "0BZ5GZPTUSCNBNGBNQZEG.png": 0.9859154929577465, + "0M47PMX0DRIVKCJBYKHPJ.png": 0.9767441860465116, + "00MK8C41M7MW013CJ9SPU.png": 0.9922480620155039, + "0DMXCT01TPF8O33UMENE4.png": 0.9917355371900827, + "0WS9VI6T1X0M5H6D8O67Z.png": 0.9859154929577465, + "0XQ9XQOL15RDKQT4YZUQC.png": 0.5739130434782609, + "1DD8FWYLADAY5EJ3UZUD9.png": 0.9876543209876543, + "0MXPSYD5A5U86BSSZQMJN.png": 0.975609756097561, + "0QAOLXSIIRIRQ3W1OP7Y8.png": 0.9921259842519685, + "1BZ5GZPTUSCNBNGBNQZEG.png": 0.9767441860465116, + "0Y5AIJNHB8DTPQOC92X6P.png": 0.9882352941176471, + "0IL3BP1QRAZ54V54IBK9A.png": 0.9876543209876543, + "0MDCUYD9ASW4AGWD3ZYK5.png": 0.9891304347826086, + "0MON88TOR16AGTBLDTGJC.png": 0.9904761904761905, + "0QVCHWR0EZCMQ5J5P0Z1J.png": 0.9767441860465116, + "0IPJ09DW34Q275Z5CMS1X.png": 0.99, + "1A7ZU26KX6C0LG0D3T3ZS.png": 0.9863013698630136, + "0M49YEV7H4P48EONCBFPS.png": 0.9863013698630136, + "1D34PI1NNCV0AB4WCQMB3.png": 0.9863013698630136, + "0AGYYXV88WJW2FC6FVV3Q.png": 0.9863013698630136, + "0F9W69ODT3GQCQ6F11L2E.png": 0.9767441860465116, + "0Q3RJT1DJMPO9D9BE6JNO.png": 0.9868421052631579, + "0ETQJY2HRGYIBO46BSD3P.png": 0.4503105590062112, + "0BRZ4ALOZMMEXGR4AVJWG.png": 0.43983402489626555, + "0WLG2ZXPFXZGF9RM2Z6N6.png": 0.9871794871794872, + "1BP5KU2XHXZ0C431B4OL9.png": 0.43450479233226835, + "00ZG4J0UMAHQMR57DQ5T7.png": 0.9818181818181818, + "0S5HD36LFVDWLLH6UFK9I.png": 0.9939759036144579, + "0EW4PZW85MH9BS8VI83KZ.png": 0.9848484848484849, + "0EMFKQLMGGAFPLQGUEZSJ.png": 0.96, + "0H4TWDI39J0HRG239GQ10.png": 0.9938650306748467, + "0BSXNNN0LA94101P5D38I.png": 0.9882352941176471, + "0SLVZSD9X7VZPGQU0Q2QN.png": 0.9850746268656716, + "0K6WPSDJC0ICOWFEASYB4.png": 0.9911504424778761, + "0TY3MTJ6YZDE6QI73SH5A.png": 0.9859154929577465, + "0B87OEX5XX0BHUOQAS50A.png": 0.9896907216494846, + "1C1X00FENSOUN2Y08Y3JT.png": 0.967741935483871, + "0KEM29NIZZ7UI3CTN6NEA.png": 0.9896907216494846, + "0JZQMX95783K8QW3ERXSM.png": 0.8827586206896552, + "0R47TY8TMFAL346RUY0LW.png": 0.9696969696969697, + "0EG83QLMPW7MGGMGBYGPD.png": 0.9882352941176471, + "1AGYYXV88WJW2FC6FVV3Q.png": 0.9927536231884058, + "0OF74SYX6Q102JCQ5KELF.png": 0.9896907216494846, + "1BX1I2HS6BLV92NZHV6J1.png": 0.9940828402366864, + "1DDEMI2034QD7F4QRH1IV.png": 0.972972972972973, + "0Z8LGXZ1SMLBHV5T6Y4O9.png": 0.9859154929577465, + "0NGE5XRBD2YHBZFMDL7VD.png": 0.9795918367346939, + "0SX4TWDHV25DCZV3HQEHH.png": 0.9777777777777777, + "00Q04QLVCESVWCSMDAURN.png": 0.9855072463768116, + "0SK696SAQW3MZNDMD4W85.png": 0.984375, + "0F4WBFLG32FAT22W0NGEY.png": 0.45871559633027525, + "0TNFF3RUQ2UL3PRNYF45M.png": 0.9868421052631579, + "1A4OYW3ZL5QP76IGF0DK0.png": 0.9904761904761905, + "0IUNRRJ3JHMEAORR2EXRS.png": 0.9908256880733946, + "0L764EQB3ZGC3FYQ20PR9.png": 0.9863013698630136, + "0XZJ4SZWY0ZOD9QBZP96A.png": 0.9922480620155039, + "0PU3J7NYVCB6XLSJJOEZ9.png": 0.9911504424778761, + "0DDKIN1PFJQTFW1JADVHT.png": 0.9863013698630136, + "1A4G5JAZSJS4BT5LBZ2Q3.png": 0.9836065573770492, + "0DPMX3BRIG9CWZPYKXFWS.png": 0.9921259842519685, + "0N7P792721CFI8EDOCB0N.png": 0.9908256880733946, + "1BIC4PMO7M3ZB8WUC3STJ.png": 0.9933333333333333, + "0XNBY82W4NFSD9GV6ONKU.png": 0.9911504424778761, + "0F3P8XGEMBYESYCYAOQPN.png": 0.9923076923076923, + "00D983SP0WHF6YGMKSHCR.png": 0.9803921568627451, + "0N91H0ZWMHBPPPPON4HUW.png": 0.993006993006993, + "0AWZPWR198XN7U8HY1E32.png": 0.9836065573770492, + "0S0Z9J05KZWNPKUFRD78Z.png": 0.9927007299270073, + "0FCDXM7JS1QEBBY3DCGBM.png": 0.9795918367346939, + "0OEVVJNLZKKW7GOPM188W.png": 0.9615384615384616, + "0Z7FUMCO707ZDI55EG306.png": 0.9878048780487805, + "0DE4P4M2855D754NA8993.png": 0.9722222222222222, + "0UZ81HSUQSHVVGU56NIOG.png": 0.9902912621359223, + "0AAERNSDA06GDA7OFZVCA.png": 0.992, + "0WZXI1YECN77S9GD6GQ4M.png": 0.98989898989899, + "0S8HOU13AW544ALTKAB73.png": 0.989010989010989, + "0AAPDAAK73MRINE7PM0ZJ.png": 0.41628959276018096, + "0UJ2AFVE6RWGTYSB6DKLJ.png": 0.45871559633027525, + "0ISYQEE43TA3O41XMA47A.png": 0.993103448275862, + "0L2E8S3ICCMGPE9PS3RLV.png": 0.9908256880733946, + "0BIC4PMO7M3ZB8WUC3STJ.png": 0.984375, + "0CTFYQFHQ1S1FLIEAPZTB.png": 0.9767441860465116, + "0A9RJA2I3YJT58JR2MEOT.png": 0.9818181818181818, + "0UGHOJ96BTPB57BR0DJS7.png": 0.4505494505494505, + "0TW35WW1PRLL2YKVYWYRM.png": 0.9818181818181818, + "0HTO45RT9NH5KQUCLOV2H.png": 0.9722222222222222, + "0F0TA5W8GO31TXUFMHHTO.png": 0.98989898989899, + "0HWMSCT6L3MCGFJV4OXF8.png": 0.975609756097561, + "0KCIUQNXNE3ZMX5ECY7V3.png": 0.9925925925925926, + "0ET4I24PZATQRKGMGG5KC.png": 0.975609756097561, + "0I6WVEL7V26O3KJJ1GGYF.png": 0.9896907216494846, + "1A2AT7TW5KOMUUAK7TQXT.png": 0.9767441860465116, + "0W8NNJL30MNEY6RTPD6DA.png": 0.9767441860465116, + "0XC0XOHP855H9DFG41W9T.png": 0.9803921568627451, + "1DC57AS1OYZ1BRHZHPIO2.png": 0.9903846153846154, + "00WVVGSQ00B0IZU4OKPHQ.png": 0.9916666666666667, + "0RTI5C20W407SL59RANEM.png": 0.991304347826087, + "0A0DA327P9Y532UTLHE2N.png": 0.9722222222222222, + "0AQZMEU4Q38NKK4USHAC5.png": 0.9896907216494846, + "0U7602J86XPC7AVTSPMWL.png": 0.9878048780487805, + "0DSGAEKSK52RUNGEOGEXP.png": 0.9921259842519685, + "0JD5R5NDJKRRHT1UI6GFW.png": 0.7058823529411764, + "0PQ9OK98A29AC6GEI3DKQ.png": 0.4882352941176471, + "0TA7SVAQC7PKDE8BUP3NF.png": 0.9887640449438202, + "0TQM47CA0F30LG2C0S2KN.png": 0.9887640449438202, + "0HA1FE8828DJ86ZIJUIX4.png": 0.967741935483871, + "0P1Y0C88Y17DSXE616MQN.png": 0.98989898989899, + "0I6GDDWCTMF9V4YLGLBIM.png": 0.4036697247706422, + "0SI6DA6CAXUMFYSXBXIF6.png": 0.9906542056074766, + "0NM9CUQJV6W2N9434O81D.png": 0.9859154929577465, + "0WU8XJP1VJSLZXQ7S43HM.png": 0.9767441860465116, + "0P0WR7JJ9JBXO0HVMDETS.png": 0.975609756097561, + "0ZNTZMWW1X0QZV4AGDHYL.png": 0.9926470588235294, + "0C98HOE9TQ4HZK6DKGF5I.png": 0.989010989010989, + "0JCDZWWAMUR9FRGHL9IVN.png": 0.9911504424778761, + "0PVN50SJP1LUTHE2TID60.png": 0.9926470588235294, + "0D7CMRTBBENLYDO7EWWVZ.png": 0.98, + "0JOTZX26K6UJB6LNVK9RH.png": 0.975609756097561, + "0ZFOZ6UKG7DCCD5HSUIIX.png": 0.9876543209876543, + "0L7V0ZXS2M9JMSBD05I25.png": 0.9873417721518988, + "0G1E97R3QFH7FG9AUAIFB.png": 0.9863013698630136, + "0CORAY089OILX2OWIKU1E.png": 0.984375, + "0EH9JARAL7RYD3CVMM8AZ.png": 0.5185185185185186, + "00KDBG5H22KPNCPCK7L2P.png": 0.9848484848484849, + "00XJ5C1RWIRVID9IPUX8G.png": 0.8, + "0FFJM5ABUDDCT2DOCW2T4.png": 0.9916666666666667, + "0D34PI1NNCV0AB4WCQMB3.png": 0.9896907216494846, + "0X9D7AJTD7S91BNHMQ4L0.png": 0.9876543209876543, + "0W9SN5GJDEWTG3WAPGPDZ.png": 0.9887640449438202, + "0ATIOLLN3DOAHKX75560Q.png": 0.9882352941176471, + "1C9EM94JJTICVGS6U2T2U.png": 0.9883720930232558, + "0TG6BRHGF3C865C2OL6DE.png": 0.9882352941176471, + "1BUP8L4PGVBNQE1GSCGJZ.png": 0.9863013698630136, + "1AJLKOKRHEVOTGE90GEH6.png": 0.989010989010989, + "1C98HOE9TQ4HZK6DKGF5I.png": 0.9859154929577465, + "0IH65GI6IN6RQWJE04YPG.png": 0.9859154929577465, + "0DNHG32KRYJ9PQ7UU1YL5.png": 0.9863013698630136, + "0EV54WP1Y9JDCWMDIT0OM.png": 0.975609756097561, + "0BE3I0HX6XWZQA4EFY99C.png": 0.984375, + "0O7G4HGEK48J2NUB5RCES.png": 0.9882352941176471, + "1BXWVCNXW1Z4N1XG8QOG4.png": 0.9905660377358491, + "0M2V36SUMHY2U8FRS9NYZ.png": 0.4424778761061947, + "0STJA7OMA59TOQ8XQ54G5.png": 0.98, + "0VB0OIQZQXKY5PA111Q8B.png": 0.984375, + "0RBPX6DU1W6LIYA2VRAA4.png": 0.972972972972973, + "0SP3KJJ2HMQZF088NH2DR.png": 0.9904761904761905, + "1D0ZG3O9YHMQAPHCD0890.png": 0.9655172413793104, + "0XZ590ZLZXRB09XIADL9V.png": 0.9934640522875817, + "0QU6QW0KAWVXZ6TL7FVJE.png": 0.9933774834437086, + "0PKH21420YW57OPRJR21R.png": 0.9922480620155039, + "0TX7Y5KWQ2MVU3579QIYH.png": 0.9777777777777777, + "0Y6OW4PMMWG05F4ZFYQ40.png": 0.9767441860465116, + "0EK5DRITVR9G3KDVF1CTJ.png": 0.9876543209876543, + "0DDEMI2034QD7F4QRH1IV.png": 0.9933774834437086, + "0HJXUBEZQCR1DEUQ8V30I.png": 0.9932885906040269, + "0BG5K95UCWQ3JXWC501XA.png": 0.9886363636363636, + "00TNQG8N9T3KUVMZ7AWTB.png": 0.967741935483871, + "0TJSB9YOUAG7C9OZW3U80.png": 0.9848484848484849, + "0SYEGYPSNLKCALCQBPGK2.png": 0.9929577464788732, + "0IP23CAYMTIVE93KLVMRA.png": 0.9824561403508771, + "0KFRN6DX1A6MMGS24B39T.png": 0.9850746268656716, + "1CTFYQFHQ1S1FLIEAPZTB.png": 0.9803921568627451, + "0U9U2Q7VBD1V6HBT7FQKM.png": 0.9923076923076923, + "0S7MUFP120D8OP4ZCCCUV.png": 0.4873417721518988, + "0BXWVCNXW1Z4N1XG8QOG4.png": 0.9873417721518988, + "1A7ZA5BA5TPHBN2WP6TT9.png": 0.9824561403508771, + "1ACY14LU0VWSKDOHEAVZM.png": 0.9924812030075187, + "0MPO1XXHHM8I5BOIT3DB9.png": 0.9876543209876543, + "0RSQ19UNM98CNWII5Q25F.png": 0.975609756097561, + "0EAA9XEBN9W7XDBPK31UZ.png": 0.9803921568627451, + "0U0BR4A64P7CE7YZ57HQ1.png": 0.9911504424778761, + "0XFNT3NMKFW1DB0F2LVY3.png": 0.9916666666666667, + "1AQZMEU4Q38NKK4USHAC5.png": 0.9904761904761905, + "0VGZMTO2VCZVZKGAOHZEU.png": 0.9910714285714286, + "0DBQ2M6XQ66Y2895PYNOM.png": 0.984375, + "0BP5KU2XHXZ0C431B4OL9.png": 0.9811320754716981, + "0PYCGJHF1705P4NTCM8AS.png": 0.9824561403508771, + "0RAGYZ9465I7GLXZXCLCQ.png": 0.9924812030075187, + "1A9560NY0NQ5OVZQQBJRQ.png": 0.4636363636363636, + "0KXDSHWWWYQJBXT2Y6U8S.png": 0.9803921568627451, + "1BF411IVR1HLU1Q44I3K7.png": 0.984375, + "0T1ZL9NSVN3385DR7B86C.png": 0.9824561403508771, + "0SYKTWM1EF4KS646AWQEL.png": 0.9803921568627451, + "0S104IFNSN5EJ31212IOP.png": 0.989010989010989, + "0H2RZUXKBQEVFJ2JT29R4.png": 0.9818181818181818, + "0SVC8WRHPF38HHKBN65YD.png": 0.9926470588235294, + "0HVIW7DPWCJSWJ5PCJDM2.png": 0.9855072463768116, + "0PRIZA7CG2JAL9GTN265B.png": 0.9929577464788732, + "0FXLG8PO267BZPBBXIX4E.png": 0.9922480620155039, + "1B0LNAITDDPPCJ4I6XIWK.png": 0.9868421052631579, + "0YNQ2KZ01B1TWP9FR5DE7.png": 0.45360824742268047, + "0A8AVSZNK6GTNOCBEVFOY.png": 0.9722222222222222, + "0XM8RQF6JQDOTJ5WQVHFE.png": 0.9873417721518988, + "0JBU3LJRDTMJI2XGB6NUE.png": 0.9868421052631579, + "0FKIASN9E4KCZ0JRCAJLQ.png": 0.9917355371900827, + "0A2AT7TW5KOMUUAK7TQXT.png": 0.9882352941176471, + "0QISJETVE3HGF1PMBD1BM.png": 0.9848484848484849, + "0KBOWWQLYSIZ0P4SIZMHJ.png": 0.993421052631579, + "0OMZO818L9AC4U3JJTKGD.png": 0.9863013698630136, + "0IVOAVCWOJ4CA92H7CM1Q.png": 0.9917355371900827, + "0SH9F7EHAT35OVT003OC5.png": 0.3728813559322034, + "0F7BJ4Z9F1R95HUG4RRZD.png": 0.9767441860465116, + "00RJGV4A4UTMTLDEIR1IG.png": 0.975609756097561, + "0BUP8L4PGVBNQE1GSCGJZ.png": 0.967741935483871, + "0B1QX4K8U8P9QA3HVLRPN.png": 0.9923664122137404, + "0IZ8M2UHYSA9H6K8XIOKS.png": 0.9855072463768116, + "0KLEV2650Z6X2DAUO94QK.png": 0.9876543209876543, + "0MRYJGMAVHEDMZ3XSX9XI.png": 0.9871794871794872, + "0I6PWVE3HEK6ZZ5K53UY4.png": 0.9818181818181818, + "1BE3I0HX6XWZQA4EFY99C.png": 0.984375, + "0M7CJCA8K3PX504PNHJRT.png": 0.9883720930232558, + "0ESACK4QILSDBXRS54UK0.png": 0.9795918367346939, + "0KLU5K631Q9RHQOY6771B.png": 0.4444444444444444, + "0RAZV12CY84ZGA4BRZQUC.png": 0.9871794871794872, + "0HZ4TDEJG6BY7B2RTALZK.png": 0.9868421052631579, + "0ROPMUV96VG8PTONLNGV9.png": 0.9887640449438202, + "0L194VI2NIOAX4AUCU2WG.png": 0.9767441860465116, + "0PG6K8IFJM2PHHLA1S4Y6.png": 0.9905660377358491, + "0H5AHQVKHAKQ1W636PLCS.png": 0.9878048780487805, + "0ZAHJJUMYDOQIMIUUFAUD.png": 0.9863013698630136, + "0MO39PWU9N82Y88WNANVM.png": 0.984375, + "0ZSUP0IMF3PK86DIVWQ8V.png": 0.967741935483871, + "0M1B6J5CTPBITI79C68MO.png": 0.9824561403508771, + "0BWJOYJSDHL1XJH6UG2RM.png": 0.9882352941176471, + "0SPYHIS3OEEZ082CFJEGF.png": 0.9871794871794872, + "0A3YX0911ULBZSCUBNDZS.png": 0.9896907216494846, + "0FK1CU21TAIHIR7YWZ2W7.png": 0.9818181818181818, + "0WP1ZBKQCK8W2W0ZXI2Z4.png": 0.7916666666666666, + "1AF02R419WL1YN97ZV144.png": 0.9767441860465116, + "0BKBFKJTQPLQBNIBZSM7E.png": 0.9916666666666667, + "1C1ZYGFL2YNFM2W3P2KN1.png": 0.9871794871794872, + "0IHOYC7KXLECI1F3G1WAF.png": 0.9848484848484849, + "1A0DA327P9Y532UTLHE2N.png": 0.9868421052631579, + "0SK9B35AHQ2OQA1RDKHHP.png": 0.9917355371900827, + "0EECJZYQ42MZLSWPOK9ZH.png": 0.9887640449438202, + "0UFBWJZOD5PBKMVX7G231.png": 0.9824561403508771, + "0OZ6DU5POAFSM589UXX4S.png": 0.9876543209876543, + "0OUIP8MTUSWLFQ6J13VXT.png": 0.967741935483871, + "0NFAI2Z8TAUKU6S7892KH.png": 0.975609756097561, + "0F3VUGWY35HLOJYHPT78G.png": 0.9883720930232558, + "0AYZOGNX998RYQVPWP1OA.png": 0.9846153846153847, + "0UC2QTKS4ITXYK4E6HU9T.png": 0.9939759036144579, + "0KK6YAU45B9B34SSZTAS7.png": 0.9836065573770492, + "0WV2Q54214D8ARYKCMBE0.png": 0.547945205479452, + "0TUDLFORB7K1BVA4U0ULU.png": 0.9917355371900827, + "0XZRML313QJ6X82YZJLYT.png": 0.9848484848484849, + "0ACY14LU0VWSKDOHEAVZM.png": 0.9873417721518988, + "0HH9NAZ1I95NJINORKJIM.png": 0.9795918367346939, + "1AWZPWR198XN7U8HY1E32.png": 0.9795918367346939, + "0TLG8NFY9BXHB15A47OGW.png": 0.9926470588235294, + "1CBFM7HG55Z7O8F4Y0O0L.png": 0.9848484848484849, + "0EV3WT6VJG3QH2HFJEIBA.png": 0.975609756097561, + "0OBPU21JDPO0KPYEQGLFO.png": 0.9722222222222222, + "0MJ27YD7XBYLQKM87RM3Y.png": 0.9887640449438202, + "0BHU2JO8ODKS3OL4RIU6A.png": 0.99, + "0WVB351NNWY8OOQQRRW6F.png": 0.476878612716763, + "1BCT1VG1R4HUK3Q6NMZGU.png": 0.9916666666666667, + "0YJ043WAWUTW4AEMDTD4R.png": 0.9782608695652174, + "0YS08VVMS1YPOHVJOFXXA.png": 0.98989898989899, + "0EWWFSOUCGGD5BK6RKMKO.png": 0.522875816993464, + "0VCTD6BP09MBAXOOM5Y5E.png": 0.975609756097561, + "0S7ZGBZ7OBI15CZS5V95A.png": 0.984375, + "0JJ9O2OQ6O13OAOFM7643.png": 0.99, + "00TXY79AHYWJ7WLXB3VLV.png": 0.9846153846153847, + "0J2UQ7WIZXFK4I5TV9UHW.png": 0.9935064935064936, + "0TYF1PBQCH64LANCKYWY7.png": 0.9859154929577465, + "0SWG2OW7F5RLADFAHJ9A4.png": 0.9882352941176471, + "0RV3TKC89HQD4FRFCTNSK.png": 0.9767441860465116, + "1BQBJ8UFLH7H3JQ965JF6.png": 0.9863013698630136, + "0C70JEJWPOAT1S8RUWCVB.png": 0.972972972972973, + "0RCE6GI0QYPCA15RH6HM7.png": 0.49382716049382713, + "0SB1QV5XRJM6W0HRU4AH7.png": 0.9891304347826086, + "0I1HQDO584A6ODC54PLNA.png": 0.9891304347826086, + "1AWHACFMS9KSHM18INN41.png": 0.9836065573770492, + "1BKBFKJTQPLQBNIBZSM7E.png": 0.9863013698630136, + "0T0Q44ALMC9WURWEESEMP.png": 0.9875, + "1A9RJA2I3YJT58JR2MEOT.png": 0.9615384615384616, + "0DD8FWYLADAY5EJ3UZUD9.png": 0.9868421052631579, + "0F078JDZMTC8C8H2P8IVA.png": 0.9921875, + "0L5KEP1L6K1ALH88LLMEY.png": 0.9795918367346939, + "0U2FXJ2H3K5SQTZNJ1WV1.png": 0.98, + "0U49K9QPO02GF77TU5JB8.png": 0.9863013698630136, + "0A9560NY0NQ5OVZQQBJRQ.png": 0.9836065573770492, + "0MRQ2DF27RW94C36QLLTZ.png": 0.9863013698630136, + "0BCT1VG1R4HUK3Q6NMZGU.png": 0.9795918367346939, + "0GQC64N9E830BWDTF8L0Q.png": 0.9910714285714286, + "0HIESCSLITYADXZHOO7IA.png": 0.989010989010989, + "0FZFGRN9B0WT3XCQMOVPJ.png": 0.9767441860465116, + "00LQMDL10JL253UW69YUO.png": 0.9818181818181818, + "0U79XK18POJ6HCLLOXS4Z.png": 0.9905660377358491, + "0I3RG6GXJ2VILV3BPFIY4.png": 0.9767441860465116, + "0X8PV0Z6SNEKPIPOCP5HR.png": 0.3931034482758621, + "0UFQOEKLIWTX65AY778BD.png": 0.5275590551181102, + "0HZUERFF8VNKXAZLV8RO5.png": 0.9850746268656716, + "0FWXHCMHZ7KG6WYRNWD6Q.png": 0.9922480620155039, + "0A9B6NHM7J57SCT1Z8TAS.png": 0.9861111111111112, + "0WOTQFWQFAEPN0HZ6MYIL.png": 0.9929577464788732, + "0IUNSDMCG8WWVJJ758NN9.png": 0.9887640449438202, + "0XLK4S5OWK77LRNU2JAG9.png": 0.46543778801843316, + "00FMSMFBJU5732FGUTLIF.png": 0.9821428571428571, + "0YOETJE558OS77GHG5L5U.png": 0.9876543209876543, + "0BKXE7HQJOJV0I1LL8YOF.png": 0.9821428571428571, + "0AWHACFMS9KSHM18INN41.png": 0.5416666666666667, + "1BL58Q9DLPBQF73ROGFDX.png": 0.9921875, + "0Q7EACO6OF8WQFZXI1MRQ.png": 0.9896907216494846, + "0R1IOV08YNRVC0KQS84EF.png": 0.9818181818181818, + "0SEF4O8YR8ULW23U32SE6.png": 0.9836065573770492, + "0IQGTS9QZK0ZYRL80GOSD.png": 0.9767441860465116, + "0E00IBZTY74DGR1SSX77L.png": 0.975609756097561, + "0BR0V61AWXYXVQSK6RMY7.png": 0.9911504424778761, + "0MESCFGQYOQNMVWD6B1VU.png": 0.9885057471264368, + "0F3GIMIL9E4UNWEFYLKGV.png": 0.9824561403508771, + "1A8AVSZNK6GTNOCBEVFOY.png": 0.9910714285714286, + "0E7WX1NX5ZKR24SEIUKRN.png": 0.9811320754716981, + "0QZOZCFYQ2TK5C0Q3KN5C.png": 0.5106382978723405, + "0SDC2B1I853GR50G545IX.png": 0.9891304347826086, + "0FEKB24PHTZNT3KIZZVIS.png": 0.9876543209876543, + "0SIW9Q9NWY3TWRC712D4J.png": 0.9876543209876543, + "0JFFFUOFXDOLV2ZGQJAPB.png": 0.9887640449438202, + "0O976W9Y9NDSJ24YV7HU9.png": 0.975609756097561, + "0B0LNAITDDPPCJ4I6XIWK.png": 0.9811320754716981, + "1AAPDAAK73MRINE7PM0ZJ.png": 0.9852941176470589, + "0HC8F1RENJE297WV8RW0N.png": 0.45517241379310347, + "0OXJ4SWAYILOZVQCGO1OB.png": 0.9937106918238994, + "0I3S2Z8YWZ0JOIMKGU51B.png": 0.972972972972973, + "0Y0LZ2LRH7BR5ZDYBTH7U.png": 0.9824561403508771, + "0T0LAS5REAE827IQO0Q9U.png": 0.98989898989899, + "1AQ9EL10BYBSGJO2RLC6Q.png": 0.9868421052631579, + "0L1YL688ZRRPYAJ07UOFQ.png": 0.9911504424778761, + "1BWJOYJSDHL1XJH6UG2RM.png": 0.9922480620155039, + "0TELO9B7QI0QQVFMJXAQ1.png": 0.9896907216494846, + "0XDX2OT3OG575I0U99YAQ.png": 0.54, + "0X49B57NNHU6FEB4J21VY.png": 0.993006993006993, + "0DHJ8WY2XLWKG7K345LAK.png": 0.975609756097561, + "0BQBJ8UFLH7H3JQ965JF6.png": 0.989010989010989, + "00CBN2MRTC48ZY50RUSBW.png": 0.9767441860465116, + "1D7CMRTBBENLYDO7EWWVZ.png": 0.9863013698630136, + "0BX1I2HS6BLV92NZHV6J1.png": 0.9722222222222222, + "0XQE375V4J34MLJYN711T.png": 0.9722222222222222, + "1BKXE7HQJOJV0I1LL8YOF.png": 0.9795918367346939, + "0E3OA2PY1K3B44GN9AS0Y.png": 0.9863013698630136, + "0SCRALC3GPIO2ZD918U8L.png": 0.478021978021978, + "0ITKDLWB7SDGMM8980ZSS.png": 0.9911504424778761, + "1BYRMKANKN4PL6JFPG8AR.png": 0.989010989010989, + "0BYRMKANKN4PL6JFPG8AR.png": 0.9803921568627451, + "0R8W6O2N25AVQI9FQ5IL7.png": 0.972972972972973, + "0J9TV59N7U65CB7YCHD38.png": 0.9922480620155039, + "0VX41MM59ET2MK09202C3.png": 0.9896907216494846, + "1CGP5R7FMVCKR47XK6IVA.png": 0.9896907216494846, + "1BSXNNN0LA94101P5D38I.png": 0.9926470588235294, + "0UMVEM9RUVZDRJRFA1W2V.png": 0.9722222222222222, + "0KPHJHUXB0MS3B9RHL57O.png": 0.9868421052631579, + "0TYH6IN161KXZT369VVWQ.png": 0.9795918367346939, + "0AUTW1OL7IAPO1JH1TQUR.png": 0.984375, + "0GNCKEB99NZ0J9GCAI0TH.png": 0.9850746268656716, + "0CGP5R7FMVCKR47XK6IVA.png": 0.9915966386554622, + "0NNLAUZDCGVKZP852ZJ7X.png": 0.9836065573770492, + "0EJW9DEXTHUR17CZCUPB1.png": 0.9850746268656716, + "0JZRIWIFSATGGFL8P0NZF.png": 0.9908256880733946, + "0VNHMSVYYS2Q0H0VJDNAK.png": 0.9782608695652174, + "0Z2ZZWW84O21E70F5RGIA.png": 0.993103448275862, + "0UODYVKUWDGD6S5D7LNAW.png": 0.9930555555555556, + "0NGNPB7KAJSSKSHQV1KZS.png": 0.9767441860465116, + "0Q3C8N8G8GXV2EP88XEXI.png": 0.9795918367346939, + "0IU89E255WY0KPUD6L7Y9.png": 0.9902912621359223, + "0F22CQYG638LSZROETJ9V.png": 0.9904761904761905, + "0HBX9X0EJVVL4TA9CJ25G.png": 0.9873417721518988, + "0IRDSID7UDBLOIRB9JQ9S.png": 0.9883720930232558, + "0IHCMVD5NO41KSAB3ODC0.png": 0.5213270142180095, + "1AYZOGNX998RYQVPWP1OA.png": 0.9931506849315068, + "0WM2Y66O2ZJA831TN2E7Z.png": 0.9615384615384616, + "0CPW27F5C8I03UQBVBL2Y.png": 0.42500000000000004, + "1BR0V61AWXYXVQSK6RMY7.png": 0.989010989010989, + "0JJPRMSYFQLJKD3JYA1JP.png": 0.9850746268656716, + "0RT937QPOOWU9LKZVU0G3.png": 0.9922480620155039, + "0EFBK546D496KI033ACDF.png": 0.972972972972973, + "0EP1D1EXZC4VOMGZJGQQT.png": 0.9891304347826086, + "0DSQ4IAVY32EHCJ0AJM1Y.png": 0.9824561403508771, + "0F4HFOUP4374O8RL4E914.png": 0.9824561403508771, + "0IMS5FXCTVU6GSCR5CHTK.png": 0.984375, + "0P82SO3E98ECMRNRS62D4.png": 0.9868421052631579, + "1AUTW1OL7IAPO1JH1TQUR.png": 0.9911504424778761, + "0WFIWI83FBAOLU16M27NL.png": 0.9939024390243902, + "1A9B6NHM7J57SCT1Z8TAS.png": 0.9859154929577465, + "0UU3AG1PSZ1H78B6J17PA.png": 0.9882352941176471, + "0UVW81GETVKT5GPM6ZX0S.png": 0.9803921568627451, + "0IKFXKSQ9OA3OCRGQBZFI.png": 0.9795918367346939, + "1C70JEJWPOAT1S8RUWCVB.png": 0.9803921568627451, + "0A7ZU26KX6C0LG0D3T3ZS.png": 0.9818181818181818, + "0BL58Q9DLPBQF73ROGFDX.png": 0.9937106918238994, + "0UQWQMAYVXUFY65GH4ION.png": 0.9836065573770492, + "0R77TU5P7A0F1YTLIGSOA.png": 0.9863013698630136, + "0Q740R8QE6ZAF034ZMGQG.png": 0.9917355371900827, + "1BG5K95UCWQ3JXWC501XA.png": 0.9926470588235294, + "1CPW27F5C8I03UQBVBL2Y.png": 0.9916666666666667 + } +} \ No newline at end of file diff --git a/resources/benchmarks/tesseract_benchmark_sage-correction.txt b/resources/benchmarks/tesseract_benchmark_sage-correction.txt new file mode 100644 index 00000000..f75ea71e --- /dev/null +++ b/resources/benchmarks/tesseract_benchmark_sage-correction.txt @@ -0,0 +1,359 @@ +Tesseract version is 5.0.0 +Correction step: _sage-correction + +Table 1 - Accuracy for each file ++---------------+---------------------+-------+-----------------+--------------+ +| Dataset | Image name | --psm | Amount of words | Accuracy OCR | ++===============+=====================+=======+=================+==============+ +| english-words | Kaspersky | 6 | 111 | 99.300 | ++---------------+---------------------+-------+-----------------+--------------+ +| english-words | USB | 6 | 4 | 80.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| english-words | words1 | 6 | 19 | 100 | ++---------------+---------------------+-------+-----------------+--------------+ +| english-words | words2 | 6 | 9 | 100 | ++---------------+---------------------+-------+-----------------+--------------+ +| english-words | words3 | 6 | 9 | 100 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 0 | 4 | 315 | 94.500 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 1 | 4 | 308 | 94.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 2 | 4 | 238 | 96.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 3 | 4 | 313 | 96.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 4 | 4 | 218 | 94.100 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 5 | 4 | 291 | 94 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 6 | 4 | 268 | 95.200 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 7 | 4 | 390 | 95.100 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 8 | 4 | 117 | 94 | ++---------------+---------------------+-------+-----------------+--------------+ +| low_quality | 9 | 4 | 294 | 97.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| others | Zaklyuchenie_nevrol | 4 | 525 | 83 | +| | oga_00 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| others | Zaklyuchenie_nevrol | 4 | 241 | 87 | +| | oga_01 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| others | napalm_doc_2_2_6 | 4 | 124 | 85 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | 1.620e+14 | 4 | 695 | 99.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | 1.620e+14 | 4 | 696 | 99.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | 1.620e+14 | 4 | 699 | 99.400 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | article_multiline | 4 | 471 | 99.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | fstek17_00 | 4 | 192 | 92.300 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | fstek17_01 | 4 | 332 | 99.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | law_image | 4 | 182 | 99.500 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | napalm_doc_13_2 | 4 | 243 | 96.700 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ukaz_prezidenta_1 | 4 | 264 | 98.800 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ukodeksrf_00 | 4 | 287 | 99.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ukodeksrf_01 | 4 | 340 | 99.500 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | with_applications_0 | 4 | 146 | 94.400 | +| | 0 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | with_applications_0 | 4 | 276 | 98.800 | +| | 1 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | with_applications_0 | 4 | 165 | 98.500 | +| | 2 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | with_applications_0 | 4 | 90 | 99.400 | +| | 3 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_00 | 4 | 78 | 97.400 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_01 | 4 | 296 | 98 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_02 | 4 | 309 | 98.800 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_03 | 4 | 337 | 98.300 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_04 | 4 | 257 | 96.300 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_05 | 4 | 238 | 97.800 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_06 | 4 | 219 | 93.500 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_07 | 4 | 233 | 98.500 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_08 | 4 | 284 | 95.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_09 | 4 | 154 | 97.600 | ++---------------+---------------------+-------+-----------------+--------------+ + +Table 2 - AVG by each type of symbols: ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ +| Datase | ASCII_ | ASCII_ | ASCII_ | ASCII_ | Latin1 | Cyrill | Amoun | AVG A | +| t | Spacin | Specia | Digits | Upperc | _Speci | ic | t of | ccura | +| | g_Char | l_Symb | | ase_Ch | al_Sym | | words | cy | +| | s | ols | | ars | bols | | | | ++========+========+========+========+========+========+========+=======+=======+ +| englis | 94.820 | 99.333 | 100 | 0 | 0 | 94.540 | 152 | 96.04 | +| h- | | | | | | | | 0 | +| words | | | | | | | | | ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ +| low_qu | 99.190 | 75.340 | 94.544 | 0 | 0 | 97.640 | 2752 | 95.29 | +| ality | | | | | | | | 0 | ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ +| others | 89.767 | 77.100 | 89.533 | 0 | 0 | 86.433 | 890 | 85 | ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ +| tz-npa | 98.956 | 90.920 | 92.104 | 0 | 0 | 99.488 | 7483 | 97.92 | +| | | | | | | | | 0 | ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ + +Table 3 -OCR error by symbol: ++--------+---------------------------------------------------------------------+ +| Symbol | Cnt Errors & Correct-Generated | ++========+=====================================================================+ +| | ['3 & <. №> -> < No>', '2 & < 2> -> ', '2 & < г> -> <К>', '2 & < | +| | ‚> -> <,>', "2 & <1 > -> <'>", '2 & <и > -> <н>', '2 & <№ > -> | +| | '] | ++--------+---------------------------------------------------------------------+ +| . | ['54 & <.> -> <,>', '3 & <. №> -> < No>', '3 & <3.> -> < De>', '3 & | +| | <В.В> -> ', '2 & <Г.> -> <С>', '2 & <г.> -> <ГТ>', '2 & <п.> -> | +| | <,>'] | ++--------+---------------------------------------------------------------------+ +| , | ['80 & <,> -> <.>', '3 & <ва,> -> <нь>', '1 & <,> -> <»>'] | ++--------+---------------------------------------------------------------------+ +| е | ['6 & <не> -> ', '4 & <е> -> <ё>', '3 & <все> -> <Ко>', '3 & | +| | <ге> -> <Кри>', '3 & <е-> -> <бов>', '3 & <е> -> <а>', '3 & <цев> | +| | -> ', '3 & <че-> -> <и»>', '2 & <е> -> <и>', '2 & <е> -> | +| | <ми>', '2 & <е> -> <с>', '2 & <ее> -> ', '2 & <ле> -> <У>', '1 | +| | & <е> -> <Е>', '1 & <е> -> <о>'] | ++--------+---------------------------------------------------------------------+ +| о | ['6 & <то> -> ', '3 & <По> -> ', '3 & <Про> -> <Ис>', '3 & | +| | <но> -> ', '3 & <она> -> ', '3 & <под> -> ', '3 & | +| | <фок> -> <М>', '2 & <во> -> <за>', '2 & <до> -> ', '2 & <до> -> | +| | ', '2 & <о> -> <ак>', '2 & <о> -> <у>', '2 & <об> -> <бы>', '2 | +| | & <по> -> <10>', '2 & <то> -> ', '1 & <о> -> <в>', '1 & <о> -> | +| | <я>'] | ++--------+---------------------------------------------------------------------+ +| а | ['5 & <а> -> <о>', '4 & <на> -> ', '3 & <Нам> -> ', '3 & | +| | <а> -> <ёту>', '3 & <ва,> -> <нь>', '3 & <на> -> <под>', '3 & <она> | +| | -> ', '3 & <рак> -> <Ли>', '3 & <сан> -> <еви>', '3 & <так> -> | +| | ', '2 & <Ла> -> <А>', '2 & <а> -> <ся>', '2 & <ва> -> <к>', '2 | +| | & <на> -> ', '1 & <а> -> <Б>', '1 & <а> -> <е>', '1 & <а> -> | +| | <у>', '1 & <а> -> <ы>', '1 & <а> -> <ь>'] | ++--------+---------------------------------------------------------------------+ +| н | ['6 & <не> -> ', '4 & <на> -> ', '3 & <на> -> <под>', '3 & | +| | <но> -> ', '3 & <она> -> ', '3 & <сан> -> <еви>', '2 & | +| | <йн> -> <ем>', '2 & <н> -> <п>', '2 & <на> -> ', '2 & <нк> -> | +| | <х>', '2 & <ны> -> <им>', '1 & <н> -> <Н>', '1 & <н> -> <и>', '1 & | +| | <н> -> <й>', '1 & <н> -> <л>', '1 & <н> -> <м>', '1 & <н> -> <ф>'] | ++--------+---------------------------------------------------------------------+ +| и | ['4 & <и> -> <е>', '3 & <ив> -> <ьюж>', '3 & <тип> -> ', '3 & | +| | <ции> -> <узы>', '2 & <и > -> <н>', '2 & <и> -> <10>', '2 & <и> -> | +| | <ей>', '2 & <и> -> <мм>', '2 & <ис> -> <не>', '2 & <их> -> ', | +| | '2 & <их> -> ', '2 & <си> -> <ен>', '1 & <и> -> <В>', '1 & <и> | +| | -> <а>', '1 & <и> -> <с>', '1 & <и> -> <ь>'] | ++--------+---------------------------------------------------------------------+ +| - | ['8 & <-> -> <но>', '6 & <-> -> <ния>', '5 & <-> -> <в>', '3 & <-> | +| | -> <жья>', '3 & <-> -> <ков>', '3 & <-> -> <нил>', '3 & <-> -> | +| | <щим>', '3 & <е-> -> <бов>', '3 & <че-> -> <и»>', '2 & <-> -> | +| | <ве>', '2 & <-> -> <да>', '2 & <-> -> <ие>', '2 & <-> -> <ко>', '2 | +| | & <-> -> <ли>', '2 & <-> -> <м">', '2 & <-> -> <м>', '2 & <-> -> | +| | <мо>', '2 & <-> -> <ны>', '2 & <-> -> <ры>', '2 & <-> -> <ых>', '2 | +| | & <-> -> <“>', '2 & <у-> -> <ем>', '2 & <ы-> -> <им>', '2 & <ы-> -> | +| | <ём>', '1 & <-> -> <">', '1 & <-> -> <»>', '1 & <-> -> <д>', '1 & | +| | <-> -> <л>', '1 & <-> -> <н>', '1 & <-> -> <ы>'] | ++--------+---------------------------------------------------------------------+ +| 1 | ["4 & <1> -> <'>", '4 & <1С> -> ', '3 & <1> -> <3>', '3 & <№1> | +| | -> ', '3 & <№1»> -> ', "2 & <1 > -> <'>", '2 & <1C> -> | +| | ', '2 & <1C> -> <С>', '2 & <1> -> <2>', '2 & <1> -> ', '1 & | +| | <1> -> ', '1 & <1> -> <5>', '1 & <1> -> <Г>', '1 & <1> -> <С>', | +| | '1 & <1> -> <Т>'] | ++--------+---------------------------------------------------------------------+ +| № | ['94 & <№> -> ', '6 & <№> -> ', '3 & <. №> -> < No>', '3 & | +| | <№1> -> ', '3 & <№1»> -> ', '2 & <№ > -> '] | ++--------+---------------------------------------------------------------------+ +| в | ['4 & <в> -> <6>', '3 & <ва,> -> <нь>', '3 & <все> -> <Ко>', '3 & | +| | <ив> -> <ьюж>', '3 & <ств> -> <У н>', '3 & <цев> -> ', '2 & | +| | <в> -> <«В>', '2 & <в> -> <зм>', '2 & <в> -> <м>', '2 & <в> -> | +| | <по>', '2 & <ва> -> <к>', '2 & <во> -> <за>', '1 & <в> -> ', '1 | +| | & <в> -> <В>', '1 & <в> -> <г>', '1 & <в> -> <н>'] | ++--------+---------------------------------------------------------------------+ +| с | ['3 & <все> -> <Ко>', '3 & <сан> -> <еви>', '3 & <ств> -> <У н>', | +| | '2 & <ис> -> <не>', '2 & <с> -> <Не>', '2 & <с> -> <От>', '2 & <си> | +| | -> <ен>', '1 & <с> -> ', '1 & <с> -> <б>', '1 & <с> -> <н>'] | ++--------+---------------------------------------------------------------------+ +| т | ['6 & <то> -> ', '3 & <ств> -> <У н>', '3 & <так> -> ', '3 | +| | & <тип> -> ', '2 & <т> -> <г>', '2 & <то> -> ', '1 & <т> | +| | -> <Д>', '1 & <т> -> <Т>', '1 & <т> -> <м>'] | ++--------+---------------------------------------------------------------------+ +| л | ['2 & <зл> -> <им>', '2 & <ле> -> <У>', '1 & <л> -> ', '1 & <л> | +| | -> <Л>', '1 & <л> -> <д>', '1 & <л> -> <т>'] | ++--------+---------------------------------------------------------------------+ +| р | ['3 & <Про> -> <Ис>', '3 & <гр> -> <тав>', '3 & <рак> -> <Ли>', '2 | +| | & <гр> -> ', '2 & <р> -> <ал>'] | ++--------+---------------------------------------------------------------------+ +| 2 | ['2 & < 2> -> ', '2 & <28> -> ', '2 & <28> -> <ИР>', '2 & | +| | <28> -> <Я>'] | ++--------+---------------------------------------------------------------------+ +| д | ['3 & <д> -> <Пен>', '3 & <под> -> ', '2 & <до> -> ', '2 & | +| | <до> -> ', '1 & <д> -> <Т>', '1 & <д> -> <Ц>'] | ++--------+---------------------------------------------------------------------+ +| г | ['3 & <ге> -> <Кри>', '3 & <гр> -> <тав>', '2 & < г> -> <К>', '2 & | +| | <г.> -> <ГТ>', '2 & <г> -> <т>', '2 & <гр> -> '] | ++--------+---------------------------------------------------------------------+ +| 3 | ['3 & <3.> -> < De>', '1 & <3> -> <">', '1 & <3> -> '] | ++--------+---------------------------------------------------------------------+ +| С | ['6 & <СЗВ> -> ', '4 & <1С> -> ', '3 & <ОС> -> ', '3 | +| | & <С> -> ', '2 & <ОС> -> '] | ++--------+---------------------------------------------------------------------+ +| N | ['22 & -> <М>'] | ++--------+---------------------------------------------------------------------+ +| А | ['2 & <А> -> ', '2 & <А> -> <Ли>'] | ++--------+---------------------------------------------------------------------+ +| И | ['2 & <И> -> <АН>', '1 & <И> -> <В>', '1 & <И> -> <Й>'] | ++--------+---------------------------------------------------------------------+ +| п | ['3 & <под> -> ', '3 & <тип> -> ', '2 & <п.> -> <,>', '2 | +| | & <п> -> <и >', '2 & <п> -> <л>', '2 & <по> -> <10>', '1 & <п> -> | +| | <П>'] | ++--------+---------------------------------------------------------------------+ +| к | ['3 & <рак> -> <Ли>', '3 & <так> -> ', '3 & <фок> -> <М>', '2 | +| | & <нк> -> <х>'] | ++--------+---------------------------------------------------------------------+ +| у | ['3 & <у> -> <ы>', '2 & <у-> -> <ем>'] | ++--------+---------------------------------------------------------------------+ +| Н | ['3 & <Нам> -> ', '2 & <Н> -> <ЕМ>', '1 & <Н> -> <И>'] | ++--------+---------------------------------------------------------------------+ +| Е | ['2 & <ЕМ> -> <Ш>'] | ++--------+---------------------------------------------------------------------+ +| О | ['3 & <ОС> -> ', '2 & <ОС> -> ', '2 & <Об> -> <06>', '1 & | +| | <О> -> ', '1 & <О> -> <Ю>', '1 & <О> -> <о>'] | ++--------+---------------------------------------------------------------------+ +| П | ['3 & <По> -> ', '3 & <Про> -> <Ис>', '2 & <П> -> <И>', '1 & | +| | <П> -> <К>', '1 & <П> -> <п>'] | ++--------+---------------------------------------------------------------------+ +| б | ['3 & <"б"> -> <“8”>', '2 & <Об> -> <06>', '2 & <б> -> <«Л>', '2 & | +| | <об> -> <бы>'] | ++--------+---------------------------------------------------------------------+ +| ы | ['2 & <ны> -> <им>', '2 & <ы-> -> <им>', '2 & <ы-> -> <ём>', '1 & | +| | <ы> -> <б>', '1 & <ы> -> <е>'] | ++--------+---------------------------------------------------------------------+ +| ; | ['9 & <;> -> <:>', '1 & <;> -> <,>', '1 & <;> -> <.>'] | ++--------+---------------------------------------------------------------------+ +| Т | ['3 & <МРТ> -> ', '3 & <Т> -> <Г>', '3 & <ТЗР> -> '] | ++--------+---------------------------------------------------------------------+ +| м | ['3 & <Нам> -> '] | ++--------+---------------------------------------------------------------------+ +| В | ['6 & <СЗВ> -> ', '3 & <В.В> -> ', '2 & <ВЗ> -> <РИ>'] | ++--------+---------------------------------------------------------------------+ +| 0 | ['3 & <608> -> '] | ++--------+---------------------------------------------------------------------+ +| I | ['3 & -> <Ш>', '3 & -> <УП>', '1 & -> '] | ++--------+---------------------------------------------------------------------+ +| М | ['3 & <МРТ> -> ', '2 & <ЕМ> -> <Ш>'] | ++--------+---------------------------------------------------------------------+ +| 6 | ['3 & <608> -> '] | ++--------+---------------------------------------------------------------------+ +| Р | ['3 & <МРТ> -> ', '3 & <ТЗР> -> '] | ++--------+---------------------------------------------------------------------+ +| ц | ['3 & <цев> -> ', '3 & <ции> -> <узы>', '2 & <ц> -> <С>', '1 & | +| | <ц> -> <щ>'] | ++--------+---------------------------------------------------------------------+ +| Л | ['2 & <Ла> -> <А>'] | ++--------+---------------------------------------------------------------------+ +| 5 | ['2 & <75> -> <2>'] | ++--------+---------------------------------------------------------------------+ +| з | ['2 & <зл> -> <им>'] | ++--------+---------------------------------------------------------------------+ +| 8 | ['3 & <608> -> ', '2 & <28> -> ', '2 & <28> -> <ИР>', '2 & | +| | <28> -> <Я>'] | ++--------+---------------------------------------------------------------------+ +| й | ['2 & <й> -> <е:>', '2 & <йн> -> <ем>'] | ++--------+---------------------------------------------------------------------+ +| " | ['3 & <"б"> -> <“8”>', '2 & <"> -> <“>', '1 & <"> -> <”>'] | ++--------+---------------------------------------------------------------------+ +| 7 | ['2 & <75> -> <2>'] | ++--------+---------------------------------------------------------------------+ +| E | ['3 & -> <ЕВР>'] | ++--------+---------------------------------------------------------------------+ +| З | ['6 & <СЗВ> -> ', '3 & <БЗ> -> <653>', '3 & <ТЗР> -> ', | +| | '2 & <ВЗ> -> <РИ>'] | ++--------+---------------------------------------------------------------------+ +| ч | ['3 & <че-> -> <и»>'] | ++--------+---------------------------------------------------------------------+ +| : | ['2 & <:> -> '] | ++--------+---------------------------------------------------------------------+ +| [ | ['2 & <[> -> <(>'] | ++--------+---------------------------------------------------------------------+ +| ] | ['2 & <]> -> <)>'] | ++--------+---------------------------------------------------------------------+ +| 4 | ['1 & <4> -> <“>'] | ++--------+---------------------------------------------------------------------+ +| C | ['2 & <1C> -> ', '2 & <1C> -> <С>', '2 & -> <С>'] | ++--------+---------------------------------------------------------------------+ +| Б | ['3 & <БЗ> -> <653>'] | ++--------+---------------------------------------------------------------------+ +| Д | ['1 & <Д> -> <З>'] | ++--------+---------------------------------------------------------------------+ +| | | ['1 & <|> -> <1>'] | ++--------+---------------------------------------------------------------------+ +| Ц | ['1 & <Ц> -> <Т>'] | ++--------+---------------------------------------------------------------------+ +| ш | ['2 & <ш> -> <«Ч>', '1 & <ш> -> <ч>'] | ++--------+---------------------------------------------------------------------+ +| P | ['3 & -> <ЕВР>'] | ++--------+---------------------------------------------------------------------+ +| R | ['3 & -> <ЕВР>'] | ++--------+---------------------------------------------------------------------+ +| a | ['4 & -> <на>', '1 & -> <а>'] | ++--------+---------------------------------------------------------------------+ +| х | ['2 & <их> -> ', '2 & <их> -> '] | ++--------+---------------------------------------------------------------------+ +| — | ['1 & <—> -> <->'] | ++--------+---------------------------------------------------------------------+ +| G | ['2 & -> <С>'] | ++--------+---------------------------------------------------------------------+ +| H | ['4 & -> <на>', '2 & -> <Из>'] | ++--------+---------------------------------------------------------------------+ +| V | ['3 & -> <УП>'] | ++--------+---------------------------------------------------------------------+ +| m | ['2 & -> '] | ++--------+---------------------------------------------------------------------+ +| ф | ['3 & <фок> -> <М>', '1 & <ф> -> <Ф>'] | ++--------+---------------------------------------------------------------------+ +| ю | ['1 & <ю> -> <у>'] | ++--------+---------------------------------------------------------------------+ +| c | ['2 & -> <со>', '1 & -> <с>'] | ++--------+---------------------------------------------------------------------+ +| o | ['2 & -> <со>', '2 & -> '] | ++--------+---------------------------------------------------------------------+ +| Ю | ['2 & <Ю> -> <1 >'] | ++--------+---------------------------------------------------------------------+ +| ‚ | ['2 & < ‚> -> <,>'] | ++--------+---------------------------------------------------------------------+ +| Y | ['1 & -> <У>'] | ++--------+---------------------------------------------------------------------+ +| _ | ['1 & <_> -> <Х>'] | ++--------+---------------------------------------------------------------------+ +| d | ['1 & -> <4>'] | ++--------+---------------------------------------------------------------------+ +| e | ['2 & -> <Из>'] | ++--------+---------------------------------------------------------------------+ +| x | ['1 & -> <х>'] | ++--------+---------------------------------------------------------------------+ +| y | ['1 & -> <у>'] | ++--------+---------------------------------------------------------------------+ +| » | ['3 & <№1»> -> '] | ++--------+---------------------------------------------------------------------+ +| Г | ['2 & <Г.> -> <С>'] | ++--------+---------------------------------------------------------------------+ \ No newline at end of file diff --git a/resources/benchmarks/tesseract_benchmark_textblob-correction.txt b/resources/benchmarks/tesseract_benchmark_textblob-correction.txt new file mode 100644 index 00000000..2de957a5 --- /dev/null +++ b/resources/benchmarks/tesseract_benchmark_textblob-correction.txt @@ -0,0 +1,318 @@ +Tesseract version is 4.1.1 +Correction step: _textblob-correction + +Table 1 - Accuracy for each file ++---------------+---------------------+-------+-----------------+--------------+ +| Dataset | Image name | --psm | Amount of words | Accuracy OCR | ++===============+=====================+=======+=================+==============+ +| english-words | Kaspersky | 6 | 111 | 73.400 | ++---------------+---------------------+-------+-----------------+--------------+ +| english-words | USB | 6 | 4 | 47.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| english-words | words1 | 6 | 19 | 66.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| english-words | words2 | 6 | 9 | 72.700 | ++---------------+---------------------+-------+-----------------+--------------+ +| english-words | words3 | 6 | 9 | 61.800 | ++---------------+---------------------+-------+-----------------+--------------+ +| others | Zaklyuchenie_nevrol | 4 | 525 | 80.200 | +| | oga_00 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| others | Zaklyuchenie_nevrol | 4 | 241 | 87 | +| | oga_01 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| others | napalm_doc_2_2_6 | 4 | 124 | 84.400 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | 1.620e+14 | 4 | 695 | 98.100 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | 1.620e+14 | 4 | 696 | 98.100 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | 1.620e+14 | 4 | 699 | 97.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | article_multiline | 4 | 471 | 98.400 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | fstek17_00 | 4 | 192 | 91.700 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | fstek17_01 | 4 | 332 | 97.500 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | law_image | 4 | 182 | 99 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | napalm_doc_13_2 | 4 | 243 | 95.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ukaz_prezidenta_1 | 4 | 264 | 97.100 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ukodeksrf_00 | 4 | 287 | 98.200 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ukodeksrf_01 | 4 | 340 | 97.200 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | with_applications_0 | 4 | 146 | 94.900 | +| | 0 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | with_applications_0 | 4 | 276 | 98.700 | +| | 1 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | with_applications_0 | 4 | 165 | 98.700 | +| | 2 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | with_applications_0 | 4 | 90 | 99.100 | +| | 3 | | | | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_00 | 4 | 78 | 91.900 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_01 | 4 | 296 | 94.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_02 | 4 | 309 | 96.700 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_03 | 4 | 337 | 95.500 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_04 | 4 | 257 | 94.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_05 | 4 | 238 | 96.600 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_06 | 4 | 219 | 95.800 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_07 | 4 | 233 | 96.400 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_08 | 4 | 284 | 94.300 | ++---------------+---------------------+-------+-----------------+--------------+ +| tz-npa | ТЗ_09 | 4 | 154 | 93.700 | ++---------------+---------------------+-------+-----------------+--------------+ + +Table 2 - AVG by each type of symbols: ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ +| Datase | ASCII_ | ASCII_ | ASCII_ | ASCII_ | Latin1 | Cyrill | Amoun | AVG A | +| t | Spacin | Specia | Digits | Upperc | _Speci | ic | t of | ccura | +| | g_Char | l_Symb | | ase_Ch | al_Sym | | words | cy | +| | s | ols | | ars | bols | | | | ++========+========+========+========+========+========+========+=======+=======+ +| englis | 100 | 99.333 | 100 | 0 | 0 | 60.680 | 152 | 64.48 | +| h- | | | | | | | | 0 | +| words | | | | | | | | | ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ +| others | 90.767 | 80.167 | 90.700 | 0 | 0 | 83.400 | 890 | 83.86 | +| | | | | | | | | 7 | ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ +| tz-npa | 99.328 | 91.692 | 85.916 | 0 | 0 | 97.300 | 7483 | 96.42 | +| | | | | | | | | 4 | ++--------+--------+--------+--------+--------+--------+--------+-------+-------+ + +Table 3 -OCR error by symbol: ++--------+---------------------------------------------------------------------+ +| Symbol | Cnt Errors & Correct-Generated | ++========+=====================================================================+ +| о | ['198 & <по> -> ', '118 & <от> -> ', '46 & <об> -> ', | +| | '12 & <во> -> ', '12 & <то> -> ', '10 & <до> -> ', '8 & | +| | <со> -> ', '4 & <По> -> ', '4 & <Со> -> ', '4 & <но> -> | +| | ', '4 & <он> -> ', '3 & <и о> -> ', '2 & <го> -> ', | +| | '2 & <по> -> '] | ++--------+---------------------------------------------------------------------+ +| н | ['246 & <на> -> ', '92 & <не> -> ', '4 & <но> -> ', '4 | +| | & <он> -> ', '2 & <нa> -> ', '2 & <н> -> <и>', '2 & <на> -> | +| | '] | ++--------+---------------------------------------------------------------------+ +| а | ['246 & <на> -> ', '56 & <за> -> ', '6 & <На> -> ', '4 | +| | & <За> -> ', '2 & <на> -> ', '2 & <ра> -> '] | ++--------+---------------------------------------------------------------------+ +| е | ['92 & <не> -> ', '12 & <ед> -> ', '6 & <ее> -> ', '4 & | +| | <Не> -> ', '3 & <е> -> <с>', '3 & <пер> -> ', '2 & <же> -> | +| | ', '2 & <те> -> ', '1 & <е> -> <а>'] | ++--------+---------------------------------------------------------------------+ +| т | ['118 & <от> -> ', '36 & <ст> -> ', '12 & <то> -> ', '6 | +| | & <т> -> <г>', '6 & <ти> -> < of>', '3 & <От > -> ', '3 & <тип> | +| | -> ', '2 & <рт> -> ', '2 & <те> -> '] | ++--------+---------------------------------------------------------------------+ +| п | ['198 & <по> -> ', '3 & <Тип> -> ', '3 & <пер> -> ', | +| | '3 & <тип> -> ', '2 & <п> -> <и>', '2 & <по> -> ', '1 & | +| | <п> -> <н>'] | ++--------+---------------------------------------------------------------------+ +| | ['3 & <От > -> ', '3 & <и о> -> ', '3 & <с 6> -> ', '2 | +| | & <. > -> < ‘>', "2 & <1 > -> <'>", '2 & -> <№>'] | ++--------+---------------------------------------------------------------------+ +| 1 | ['104 & <1С> -> ', '18 & <1C> -> ', '8 & <1С> -> ', '4 | +| | & <11> -> <И>', '4 & <1C> -> ', "3 & <1> -> <'>", "2 & <1 > -> | +| | <'>", '2 & <1C> -> ', '2 & <1C> -> <С>', '2 & <1> -> <|>', '2 & | +| | <31> -> ', '1 & <1> -> <\\>'] | ++--------+---------------------------------------------------------------------+ +| и | ['34 & <из> -> ', '32 & <их> -> ', '12 & <им> -> ', '6 | +| | & <ти> -> < of>', '3 & <Тип> -> ', '3 & <и о> -> ', '3 & | +| | <тип> -> ', '2 & <ис> -> <не>'] | ++--------+---------------------------------------------------------------------+ +| С | ['104 & <1С> -> ', '8 & <1С> -> ', '8 & <ОС> -> ', '4 & | +| | <Со> -> ', '3 & <НДС> -> ', '2 & <ДС> -> ', '2 & <ЮС> | +| | -> <1О>', '1 & <С> -> <—>'] | ++--------+---------------------------------------------------------------------+ +| , | ['64 & <,> -> <.>', '6 & <ПО,> -> ', '1 & <,> -> <;>'] | ++--------+---------------------------------------------------------------------+ +| . | ['3 & <.> -> <,>', '3 & <3.> -> < He>', '2 & <. > -> < ‘>', '2 & | +| | <г.> -> <Г>'] | ++--------+---------------------------------------------------------------------+ +| с | ['36 & <ст> -> ', '8 & <со> -> ', '4 & <см> -> ', '3 & | +| | <с 6> -> ', '3 & <ься> -> < by>', '2 & <ис> -> <не>', '1 & <с> | +| | -> ', '1 & <с> -> <©>', '1 & <с> -> <е>'] | ++--------+---------------------------------------------------------------------+ +| з | ['56 & <за> -> ', '34 & <из> -> '] | ++--------+---------------------------------------------------------------------+ +| О | ['20 & <ПО> -> ', '14 & <Об> -> ', '8 & <ДО> -> ', '8 & | +| | <ОС> -> ', '6 & <ПО,> -> ', '4 & <АО> -> ', '4 & <ЛО> | +| | -> ', '4 & <МО> -> ', '3 & <От > -> '] | ++--------+---------------------------------------------------------------------+ +| л | ['6 & <для> -> ', '6 & <мл> -> ', '3 & <для> -> <11>', '3 & | +| | <для> -> ', '3 & <л> -> <п>', '2 & <Эл> -> ', '2 & <ул> -> | +| | '] | ++--------+---------------------------------------------------------------------+ +| б | ['46 & <об> -> ', '14 & <Об> -> '] | ++--------+---------------------------------------------------------------------+ +| д | ['12 & <ед> -> ', '10 & <до> -> ', '6 & <для> -> ', '3 & | +| | <д> -> <л>', '3 & <для> -> <11>', '3 & <для> -> ', '1 & <д> -> | +| | <2>'] | ++--------+---------------------------------------------------------------------+ +| З | ['56 & <ФЗ> -> ', '4 & <За> -> ', '3 & <БЗ> -> <653>', '3 & | +| | <ТЗР> -> ', '2 & <ВЗ> -> <Ръ>'] | ++--------+---------------------------------------------------------------------+ +| в | ['12 & <во> -> ', '1 & <в> -> ', '1 & <в> -> <В>', '1 & <в> | +| | -> <п>'] | ++--------+---------------------------------------------------------------------+ +| Ф | ['56 & <ФЗ> -> ', '12 & <РФ> -> ', '2 & <ФД> -> '] | ++--------+---------------------------------------------------------------------+ +| м | ['12 & <им> -> ', '12 & <мг> -> ', '6 & <мл> -> ', '4 & | +| | <см> -> ', '2 & <мм> -> ', '2 & <мы> -> '] | ++--------+---------------------------------------------------------------------+ +| г | ['12 & <мг> -> ', '2 & <г.> -> <Г>', '2 & <г> -> <т >', '2 & | +| | <г> -> <т>', '2 & <го> -> ', '2 & <гр> -> ', '2 & <гр> -> | +| | <тв>', '1 & <г> -> '] | ++--------+---------------------------------------------------------------------+ +| р | ['3 & <пер> -> ', '2 & <гр> -> ', '2 & <гр> -> <тв>', '2 & | +| | <ра> -> ', '2 & <рт> -> ', '2 & <ры> -> '] | ++--------+---------------------------------------------------------------------+ +| П | ['20 & <ПО> -> ', '6 & <ПО,> -> ', '4 & <По> -> ', '1 | +| | & <П> -> <И>'] | ++--------+---------------------------------------------------------------------+ +| Н | ['6 & <Н> -> <* П>', '6 & <На> -> ', '4 & <Не> -> ', '3 & | +| | <Н> -> <И>', '3 & <НДС> -> ', '2 & <ЕН> -> <ек>', '2 & <НБ> -> | +| | ', '1 & <Н> -> <П>'] | ++--------+---------------------------------------------------------------------+ +| 2 | ['4 & <28> -> ', '2 & <28> -> <Я >'] | ++--------+---------------------------------------------------------------------+ +| N | ['22 & -> <М>', '2 & -> <№>'] | ++--------+---------------------------------------------------------------------+ +| E | ['45 & -> <ЕВР>', '3 & -> <ЕКР>'] | ++--------+---------------------------------------------------------------------+ +| А | ['4 & <АО> -> ', '2 & <АД> -> '] | ++--------+---------------------------------------------------------------------+ +| a | ['6 & -> ', '2 & -> ', '2 & <нa> -> ', '1 & | +| | -> <а>'] | ++--------+---------------------------------------------------------------------+ +| И | ['4 & <ИБ> -> ', '2 & <ИТ> -> ', '1 & <И> -> <Н>'] | ++--------+---------------------------------------------------------------------+ +| я | ['6 & <для> -> ', '3 & <для> -> <11>', '3 & <для> -> ', '3 & | +| | <ься> -> < by>'] | ++--------+---------------------------------------------------------------------+ +| 3 | ['3 & <3.> -> < He>', '2 & <31> -> ', '1 & <3> -> '] | ++--------+---------------------------------------------------------------------+ +| P | ['45 & -> <ЕВР>', '3 & -> <ЕКР>'] | ++--------+---------------------------------------------------------------------+ +| R | ['45 & -> <ЕВР>', '3 & -> <ЕКР>', '3 & -> <ОСК>'] | ++--------+---------------------------------------------------------------------+ +| Д | ['12 & <БД> -> ', '8 & <ДО> -> ', '4 & <ЕД> -> ', '3 & | +| | <НДС> -> ', '2 & <АД> -> ', '2 & <ДС> -> ', '2 & <ФД> | +| | -> ', '1 & <Д> -> <З>'] | ++--------+---------------------------------------------------------------------+ +| e | ['2 & -> '] | ++--------+---------------------------------------------------------------------+ +| Е | ['4 & <ЕД> -> ', '2 & <ЕН> -> <ек>'] | ++--------+---------------------------------------------------------------------+ +| C | ['18 & <1C> -> ', '4 & <1C> -> ', '3 & -> <ОСК>', '2 | +| | & <1C> -> ', '2 & <1C> -> <С>', '2 & -> ', '2 & -> | +| | '] | ++--------+---------------------------------------------------------------------+ +| Р | ['12 & <РФ> -> ', '3 & <ТЗР> -> '] | ++--------+---------------------------------------------------------------------+ +| х | ['32 & <их> -> '] | ++--------+---------------------------------------------------------------------+ +| I | ['3 & -> <Ш>', '3 & -> <130>', '3 & -> ', '2 | +| | & -> ', '2 & -> <1>', '1 & -> <|>'] | ++--------+---------------------------------------------------------------------+ +| Б | ['12 & <БД> -> ', '4 & <ИБ> -> ', '3 & <БЗ> -> <653>', '2 & | +| | <НБ> -> '] | ++--------+---------------------------------------------------------------------+ +| Т | ['3 & <ТЗР> -> ', '3 & <Тип> -> ', '2 & <ИТ> -> ', '1 | +| | & <Т> -> <Г>'] | ++--------+---------------------------------------------------------------------+ +| 0 | ['3 & <608> -> '] | ++--------+---------------------------------------------------------------------+ +| М | ['4 & <МО> -> '] | ++--------+---------------------------------------------------------------------+ +| у | ['2 & <ул> -> '] | ++--------+---------------------------------------------------------------------+ +| 6 | ['3 & <608> -> ', '3 & <с 6> -> '] | ++--------+---------------------------------------------------------------------+ +| Л | ['4 & <ЛО> -> '] | ++--------+---------------------------------------------------------------------+ +| ь | ['3 & <ься> -> < by>'] | ++--------+---------------------------------------------------------------------+ +| - | ['1 & <-> -> <—>'] | ++--------+---------------------------------------------------------------------+ +| u | ['3 & -> '] | ++--------+---------------------------------------------------------------------+ +| ; | ['9 & <;> -> <:>'] | ++--------+---------------------------------------------------------------------+ +| В | ['2 & <ВЗ> -> <Ръ>'] | ++--------+---------------------------------------------------------------------+ +| ы | ['2 & <мы> -> ', '2 & <ры> -> '] | ++--------+---------------------------------------------------------------------+ +| c | ['1 & -> <с>'] | ++--------+---------------------------------------------------------------------+ +| p | ['2 & -> '] | ++--------+---------------------------------------------------------------------+ +| ц | ['1 & <ц> -> <щ>'] | ++--------+---------------------------------------------------------------------+ +| 5 | ['2 & <75> -> <#2>'] | ++--------+---------------------------------------------------------------------+ +| 8 | ['4 & <28> -> ', '3 & <608> -> ', '2 & <28> -> <Я >'] | ++--------+---------------------------------------------------------------------+ +| O | ['3 & -> <130>', '3 & -> <ОСК>', '2 & -> '] | ++--------+---------------------------------------------------------------------+ +| S | ['3 & -> <130>'] | ++--------+---------------------------------------------------------------------+ +| ч | ['1 & <ч> -> <з>'] | ++--------+---------------------------------------------------------------------+ +| K | ['3 & -> <КНМ>'] | ++--------+---------------------------------------------------------------------+ +| d | ['2 & -> ', '1 & -> <4>'] | ++--------+---------------------------------------------------------------------+ +| й | ['1 & <й> -> <:>'] | ++--------+---------------------------------------------------------------------+ +| 7 | ['2 & <75> -> <#2>'] | ++--------+---------------------------------------------------------------------+ +| H | ['6 & -> ', '3 & -> <КНМ>', '2 & -> '] | ++--------+---------------------------------------------------------------------+ +| V | ['3 & -> '] | ++--------+---------------------------------------------------------------------+ +| Ц | ['1 & <Ц> -> <Т>'] | ++--------+---------------------------------------------------------------------+ +| M | ['3 & -> <КНМ>'] | ++--------+---------------------------------------------------------------------+ +| № | ['6 & <№> -> '] | ++--------+---------------------------------------------------------------------+ +| G | ['2 & -> <С>'] | ++--------+---------------------------------------------------------------------+ +| | | ['1 & <|> -> <1>'] | ++--------+---------------------------------------------------------------------+ +| « | ['3 & <«_»> -> '] | ++--------+---------------------------------------------------------------------+ +| » | ['3 & <«_»> -> '] | ++--------+---------------------------------------------------------------------+ +| Э | ['2 & <Эл> -> '] | ++--------+---------------------------------------------------------------------+ +| Ю | ['2 & <ЮС> -> <1О>'] | ++--------+---------------------------------------------------------------------+ +| ж | ['2 & <же> -> '] | ++--------+---------------------------------------------------------------------+ +| X | ['2 & -> '] | ++--------+---------------------------------------------------------------------+ +| Y | ['1 & -> <У>'] | ++--------+---------------------------------------------------------------------+ +| _ | ['3 & <«_»> -> '] | ++--------+---------------------------------------------------------------------+ +| — | ['1 & <—> -> <->'] | ++--------+---------------------------------------------------------------------+ \ No newline at end of file diff --git a/resources/train_dataset/img_classifier_dockerfile/README.md b/resources/train_dataset/img_classifier_dockerfile/README.md deleted file mode 100644 index 5fa1a2b1..00000000 --- a/resources/train_dataset/img_classifier_dockerfile/README.md +++ /dev/null @@ -1,21 +0,0 @@ -Предполагается что: - -* Вы только что распаковали архив с заданием (где и лежит этот файл) - -* Вы перешли в директорию, образовавшуюся после распаковки архива (все действия надо запускать из неё) - -* У вас установлен докер и вы знаете что это такое - -* Есть подключение к интернету - -Прочитайте манифест (manifest.pdf или manifest.md) - -Соберите контейнер для разметки и запустите его - -```bash -docker build -t labeling . - -docker run -ti --rm -p 5555:5555 labeling -``` - -Откройте localhost:5555 в браузере. \ No newline at end of file diff --git a/resources/train_dataset/img_classifier_dockerfile/run.sh b/resources/train_dataset/img_classifier_dockerfile/run.sh deleted file mode 100644 index 48cd1cc0..00000000 --- a/resources/train_dataset/img_classifier_dockerfile/run.sh +++ /dev/null @@ -1,21 +0,0 @@ -#Предполагается что: -# -#* Вы только что распаковали архив с заданием (где и лежит этот файл) -# -#* Вы перешли в директорию, образовавшуюся после распаковки архива (все действия надо запускать из неё) -# -#* У вас установлен докер и вы знаете что это такое -# -#* Есть подключение к интернету -# -#Прочитайте манифест (manifest.pdf или manifest.md) -# -#Соберите контейнер для разметки и запустите его -# -# -docker build -t labeling . - -docker run -ti --rm -p 5555:5555 labeling -#``` -# -#Откройте localhost:5555 в браузере. \ No newline at end of file diff --git a/dedoc/scripts/accsum b/scripts/accsum similarity index 100% rename from dedoc/scripts/accsum rename to scripts/accsum diff --git a/dedoc/scripts/accuracy b/scripts/accuracy similarity index 100% rename from dedoc/scripts/accuracy rename to scripts/accuracy diff --git a/dedoc/scripts/benchmark.py b/scripts/benchmark.py similarity index 91% rename from dedoc/scripts/benchmark.py rename to scripts/benchmark.py index 62987cbf..12c3f104 100644 --- a/dedoc/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -12,7 +12,7 @@ from dedoc.utils.utils import send_file -path_result = os.path.join(os.path.dirname(__file__), "..", "..", "resources", "benchmarks") +path_result = os.path.join(os.path.dirname(__file__), "..", "resources", "benchmarks") path_result = os.path.abspath(path_result) os.makedirs(path_result, exist_ok=True) path_result = os.path.join(path_result, "time_benchmark.json") @@ -33,18 +33,18 @@ def get_cpu_performance() -> float: cpu_performance = get_cpu_performance() -print('"cpu_performance" = {}'.format(cpu_performance)) +print(f'"cpu_performance" = {cpu_performance}') with TemporaryDirectory() as path_base: path_out = os.path.join(path_base, "dataset.zip") wget.download(data_url, path_out) - with zipfile.ZipFile(path_out, 'r') as zip_ref: + with zipfile.ZipFile(path_out, "r") as zip_ref: zip_ref.extractall(path_base) print(path_base) failed = [] result = OrderedDict() - result["version"] = requests.get("{}/version".format(host)).text + result["version"] = requests.get(f"{host}/version").text result["cpu_performance"] = cpu_performance tasks = [ Task("images", "images", {}), @@ -90,5 +90,5 @@ def get_cpu_performance() -> float: with open(path_result, "w") as file_out: json.dump(obj=result, fp=file_out, indent=4, ensure_ascii=False) - print("save result in" + path_result) + print(f"save result in {path_result}") print(failed) diff --git a/dedoc/scripts/benchmark_pdf_attachments.py b/scripts/benchmark_pdf_attachments.py similarity index 99% rename from dedoc/scripts/benchmark_pdf_attachments.py rename to scripts/benchmark_pdf_attachments.py index 5b433117..411f1275 100644 --- a/dedoc/scripts/benchmark_pdf_attachments.py +++ b/scripts/benchmark_pdf_attachments.py @@ -127,7 +127,7 @@ def _get_attachment_name(attachment: AttachedFile, png_files: int, json_files: i pdf_attachments_extractor = PDFAttachmentsExtractor(config={}) benchmarks_dict["common"] = get_attachments(attachments_extractor=pdf_attachments_extractor, input_dir=in_dir, attachments_dir=common_out_dir) - json_out_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources", "benchmarks")) + json_out_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "resources", "benchmarks")) with open(os.path.join(json_out_dir, "benchmark_pdf_attachments.json"), "w") as f: json.dump(benchmarks_dict, f, ensure_ascii=False, indent=2) diff --git a/dedoc/scripts/benchmark_pdf_miner.py b/scripts/benchmark_pdf_miner.py similarity index 90% rename from dedoc/scripts/benchmark_pdf_miner.py rename to scripts/benchmark_pdf_miner.py index 3f77e887..7161541b 100644 --- a/dedoc/scripts/benchmark_pdf_miner.py +++ b/scripts/benchmark_pdf_miner.py @@ -11,7 +11,6 @@ from dedoc.config import get_config from dedoc.dedoc_manager import DedocManager - URL = "https://at.ispras.ru/owncloud/index.php/s/uImxYhliBHU8ei7/download" URL_GT = "https://at.ispras.ru/owncloud/index.php/s/SXsOTqxGaGO9wL9/download" @@ -25,10 +24,10 @@ wget.download(URL, pdfs_zip_path) wget.download(URL_GT, pdfs_zip_gt_path) - with zipfile.ZipFile(pdfs_zip_path, 'r') as zip_ref: + with zipfile.ZipFile(pdfs_zip_path, "r") as zip_ref: zip_ref.extractall(data_dir) os.remove(pdfs_zip_path) - with zipfile.ZipFile(pdfs_zip_gt_path, 'r') as zip_ref: + with zipfile.ZipFile(pdfs_zip_gt_path, "r") as zip_ref: zip_ref.extractall(data_dir) os.remove(pdfs_zip_gt_path) @@ -54,7 +53,7 @@ accuracy_path = Path(tmpdir) / "accuracy.txt" if accuracy_path.exists(): accuracy_path.unlink() - command = f"{accuracy_script_path} \"{gt_path}\" {tmp_ocr_path} >> {accuracy_path}" + command = f'{accuracy_script_path} "{gt_path}" {tmp_ocr_path} >> {accuracy_path}' os.system(command) with open(accuracy_path, "r") as f: @@ -65,7 +64,7 @@ acc_percent = re.findall(r"\d+\.\d+", matched[0])[0][:-1] info[str(file)] = acc_percent - output_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources", "benchmarks")) + output_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "resources", "benchmarks")) with (Path(output_dir) / "benchmark_pdf_miner.json").open("w") as f: json.dump(info, f, ensure_ascii=False, indent=2) diff --git a/scripts/benchmark_table/benchmark_table.py b/scripts/benchmark_table/benchmark_table.py new file mode 100644 index 00000000..8a52048d --- /dev/null +++ b/scripts/benchmark_table/benchmark_table.py @@ -0,0 +1,164 @@ +import json +import pprint +import zipfile +from pathlib import Path +from typing import List, Optional + +import numpy as np +import wget + +from dedoc.api.api_utils import table2html +from dedoc.config import get_config +from dedoc.readers import PdfImageReader +from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_recognizer import TableRecognizer +from scripts.benchmark_table.metric import TEDS + +path_result = Path(__file__).parent / ".." / ".." / "resources" / "benchmarks" +path_result.absolute().mkdir(parents=True, exist_ok=True) + +table_recognizer = TableRecognizer(config=get_config()) +image_reader = PdfImageReader(config=get_config()) + +GENERATED_BENCHMARK = "on_generated_data" +OURDATA_BENCHMARK = "on_our_data" +TYPE_BENCHMARK = OURDATA_BENCHMARK + + +def call_metric(pred_json: dict, true_json: dict, structure_only: bool = False, ignore_nodes: Optional[List] = None) -> dict: + teds = TEDS(structure_only=structure_only, ignore_nodes=ignore_nodes) + scores = teds.batch_evaluate(pred_json, true_json) + pp = pprint.PrettyPrinter() + pp.pprint(scores) + + return scores + + +def get_tables(image_path: Path) -> str: + document = image_reader.read(str(image_path)) + + for table in document.tables: + table.metadata.uid = "test_id" + table2id = {"test_id": 0} + html_tables = [table2html(table, table2id) for table in document.tables] + + # TODO: while works with one table in an image + return html_tables[0] + + +def make_predict_json(data_path: Path) -> dict: + predict_json = {} + for pathname in Path.iterdir(data_path): + print(pathname) + + predict_json[pathname.name] = {"html": "" + get_tables(pathname) + ""} + + return predict_json + + +def download_dataset(data_dir: Path, name_zip: str, url: str) -> None: + if Path.exists(data_dir): + print(f"Use cached benchmark data from {data_dir}") + return + + data_dir.mkdir(parents=True, exist_ok=True) + pdfs_zip_path = data_dir / name_zip + wget.download(url, str(data_dir)) + + with zipfile.ZipFile(pdfs_zip_path, "r") as zip_ref: + zip_ref.extractall(data_dir) + pdfs_zip_path.unlink() + + print(f"Benchmark data downloaded to {data_dir}") + + +def prediction(path_pred: Path, path_images: Path) -> dict: + pred_json = make_predict_json(path_images) + with path_pred.open("w") as fd: + json.dump(pred_json, fd, indent=2, ensure_ascii=False) + + return pred_json + + +def benchmark_on_our_data() -> dict: + data_dir = Path(get_config()["intermediate_data_path"]) / "benchmark_table_data" + path_images = data_dir / "images" + path_gt = data_dir / "gt.json" + path_pred = data_dir / "pred.json" + download_dataset(data_dir, name_zip="benchmark_table_data.zip", url="https://at.ispras.ru/owncloud/index.php/s/Xaf4OyHj6xN2RHH/download") + + mode_metric_structure_only = False + + with open(path_gt, "r") as fp: + gt_json = json.load(fp) + """ + Creating base html (based on method predictions for future labeling) + path_images = data_dir / "images_tmp" + pred_json = prediction("gt_tmp.json", path_images) + """ + pred_json = prediction(path_pred, path_images) + scores = call_metric(pred_json=pred_json, true_json=gt_json, structure_only=mode_metric_structure_only) + + result = dict() + result["mode_metric_structure_only"] = mode_metric_structure_only + result["mean"] = np.mean([score for score in scores.values()]) + result["images"] = scores + + return result + + +def benchmark_on_generated_table() -> dict: + """ + Generated data from https://github.com/hassan-mahmood/TIES_DataGeneration + Article generation information https://arxiv.org/pdf/1905.13391.pdf + Note: generate the 1st table tape category + Note: don't use header table tag
, replacing on tag + Note: all generated data (four categories) you can download from + TODO: some tables have a low quality. Should to trace the reason. + All generated data (all categories) we can download from https://at.ispras.ru/owncloud/index.php/s/cjpCIR7I0G4JzZU + """ + + data_dir = Path(get_config()["intermediate_data_path"]) / "visualizeimgs" / "category1" + path_images = data_dir / "img_500" + path_gt = data_dir / "html_500" + download_dataset(data_dir, + name_zip="benchmark_table_data_generated_500_tables_category_1.zip", + url="https://at.ispras.ru/owncloud/index.php/s/gItWxupnF2pve6B/download") + mode_metric_structure_only = True + + # make common ground-truth file + common_gt_json = {} + for pathname in Path.iterdir(path_gt): + image_name = pathname.name.split(".")[0] + ".png" + with open(pathname, "r") as fp: + table_html = fp.read() + # exclude header tags + table_html = table_html.replace("", "") + + common_gt_json[image_name] = {"html": table_html} + + file_common_gt = data_dir / "common_gt.json" + with file_common_gt.open("w") as fd: + json.dump(common_gt_json, fd, indent=2, ensure_ascii=False) + + # calculate metrics + path_pred = data_dir / "pred.json" + + pred_json = prediction(path_pred, path_images) + scores = call_metric(pred_json=pred_json, true_json=common_gt_json, structure_only=mode_metric_structure_only, ignore_nodes=["span", "style", "head", "h4"]) + + result = dict() + result["mode_metric_structure_only"] = mode_metric_structure_only + result["mean"] = np.mean([score for score in scores.values()]) + result["images"] = scores + + return result + + +if __name__ == "__main__": + result = benchmark_on_our_data() if TYPE_BENCHMARK == OURDATA_BENCHMARK else benchmark_on_generated_table() + + # save benchmarks + file_result = path_result / f"table_benchmark_{TYPE_BENCHMARK}.json" + with file_result.open("w") as fd: + json.dump(result, fd, indent=2, ensure_ascii=False) diff --git a/scripts/benchmark_table/metric.py b/scripts/benchmark_table/metric.py new file mode 100644 index 00000000..28c46b5b --- /dev/null +++ b/scripts/benchmark_table/metric.py @@ -0,0 +1,170 @@ +# Copyright 2020 IBM +# Author: peter.zhong@au1.ibm.com +# +# This is free software; you can redistribute it and/or modify +# it under the terms of the Apache 2.0 License. +# +# This software is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Apache 2.0 License for more details. + +# Source: https://github.com/ibm-aur-nlp/PubTabNet + +from collections import deque +from typing import Optional + +import distance +from apted import APTED, Config +from apted.helpers import Tree +from lxml import etree, html +from tqdm import tqdm + + +class TableTree(Tree): + def __init__(self, tag: str, colspan=None, rowspan=None, content=None, visible=None, *children): # noqa + self.tag = tag + self.colspan = colspan + self.rowspan = rowspan + self.content = content + self.visible = visible + self.children = list(children) + + def bracket(self) -> str: + """ + Show tree using brackets notation + """ + if self.tag == "td" or self.tag == "th": + result = f'"tag": {self.tag}, "colspan": {self.colspan}, "rowspan": {self.rowspan}, "text": {self.content}' + else: + result = f'"tag": {self.tag}' + for child in self.children: + result += child.bracket() + return "{{" + result + "}}" + + +class CustomConfig(Config): + @staticmethod + def maximum(*sequences): # noqa + """ + Get maximum possible value + """ + return max(map(len, sequences)) + + def normalized_distance(self, *sequences) -> float: # noqa + """ + Get distance from 0 to 1 + """ + return float(distance.levenshtein(*sequences)) / self.maximum(*sequences) + + def rename(self, node1: TableTree, node2: TableTree) -> float: + """ + Compares attributes of trees + """ + if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan): + return 1. + if node1.tag == "td": + if not node1.visible or not node2.visible: + return 0. + if node1.content or node2.content: + return self.normalized_distance(node1.content, node2.content) + return 0. + + +class TEDS(object): + """ + Tree Edit Distance based Similarity + """ + + def __init__(self, structure_only: bool = False, n_jobs: int = 1, ignore_nodes: Optional[list] = None) -> None: + assert isinstance(n_jobs, int) and (n_jobs >= 1), "n_jobs must be an integer greather than 1" + self.structure_only = structure_only + self.n_jobs = n_jobs + self.ignore_nodes = ignore_nodes + self.__tokens__ = [] + + def tokenize(self, node: TableTree) -> None: + """ + Tokenizes table cells + """ + self.__tokens__.append(f"<{node.tag}>") + if node.text is not None: + self.__tokens__ += list(node.text) + for n in node.getchildren(): + self.tokenize(n) + if node.tag != "unk": + self.__tokens__.append(f"") + if node.tag != "td" and node.tail is not None: + self.__tokens__ += list(node.tail) + + def get_span(self, node: TableTree, name_span: str) -> int: + value = int(node.attrib.get(name_span, "1")) + return 1 if value <= 0 else value + + def load_html_tree(self, node: TableTree, parent: Optional[TableTree] = None) -> TableTree: + """ Converts HTML tree to the format required by apted + """ + if node.tag == "td": + if self.structure_only: + cell = [] + else: + self.__tokens__ = [] + self.tokenize(node) + cell = self.__tokens__[1:-1].copy() + + try: + new_node = TableTree(tag=node.tag, + colspan=self.get_span(node, "colspan"), + rowspan=self.get_span(node, "rowspan"), + content=cell, + visible=node.attrib.get("style") != "display: none", *deque()) # noqa + except Exception as ex: + print(f"Bad html file. HTML parse exception. Exception's msg: {ex}") + raise ex + else: + new_node = TableTree(node.tag, None, None, None, True, *deque()) + if parent is not None: + parent.children.append(new_node) + if node.tag != "td": + for n in node.getchildren(): + self.load_html_tree(n, new_node) + if parent is None: + return new_node + + def evaluate(self, pred: str, true: str) -> float: + """ Computes TEDS score between the prediction and the ground truth of a given sample + """ + if (not pred) or (not true): + return 0.0 + parser = html.HTMLParser(remove_comments=True, encoding="utf-8") + pred = html.fromstring(pred, parser=parser) + true = html.fromstring(true, parser=parser) + if pred.xpath("body/table") and true.xpath("body/table"): + pred = pred.xpath("body/table")[0] + true = true.xpath("body/table")[0] + if self.ignore_nodes: + etree.strip_tags(pred, *self.ignore_nodes) + etree.strip_tags(true, *self.ignore_nodes) + n_nodes_pred = len(pred.xpath(".//*")) + n_nodes_true = len(true.xpath(".//*")) + n_nodes = max(n_nodes_pred, n_nodes_true) + tree_pred = self.load_html_tree(pred) + tree_true = self.load_html_tree(true) + + distance = APTED(tree_pred, tree_true, CustomConfig()).compute_edit_distance() + return 1.0 - (float(distance) / n_nodes) + else: + return 0.0 + + def batch_evaluate(self, pred_json: dict, true_json: dict) -> dict: + """ + Computes TEDS score between the prediction and the ground truth of a batch of samples + + :param pred_json: {'FILENAME': 'HTML CODE', ...} + :param true_json: {'FILENAME': {'html': 'HTML CODE'}, ...} + :return: {'FILENAME': 'TEDS SCORE', ...} + """ + samples = true_json.keys() + scores = [self.evaluate(pred_json.get(filename, "")["html"], true_json[filename]["html"]) for filename in tqdm(samples)] + scores = dict(zip(samples, scores)) + return scores diff --git a/scripts/benchmark_table/requirements.txt b/scripts/benchmark_table/requirements.txt new file mode 100644 index 00000000..99314805 --- /dev/null +++ b/scripts/benchmark_table/requirements.txt @@ -0,0 +1,3 @@ +# for metric TEDS: +apted==1.0.3 +distance==0.1.3 \ No newline at end of file diff --git a/dedoc/scripts/benchmark_tl_correctness.py b/scripts/benchmark_tl_correctness.py similarity index 89% rename from dedoc/scripts/benchmark_tl_correctness.py rename to scripts/benchmark_tl_correctness.py index ae379c78..d959a1f4 100644 --- a/dedoc/scripts/benchmark_tl_correctness.py +++ b/scripts/benchmark_tl_correctness.py @@ -5,17 +5,17 @@ import requests import wget -from dedoc.config import get_config from tqdm import tqdm +from dedoc.config import get_config from dedoc.utils.utils import send_file -path_result = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources", "benchmarks")) +path_result = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "resources", "benchmarks")) os.makedirs(path_result, exist_ok=True) path_result = os.path.join(path_result, "benchmarks_tl_correctness.json") host = "http://localhost:1231" -param_dist_errors = namedtuple('Param', ('total_file_size', 'total_incorrect_files', 'failed')) +param_dist_errors = namedtuple("Param", ("total_file_size", "total_incorrect_files", "failed")) def errors_param_for_text_layer(path_base: str, tl_type: str, tl_path: str, parameters: dict) -> namedtuple: @@ -49,7 +49,7 @@ def errors_param_for_text_layer(path_base: str, tl_type: str, tl_path: str, para if not os.path.isdir(benchmark_data_dir): path_out = os.path.join(data_dir, "data_with_text_layer.zip") wget.download("https://at.ispras.ru/owncloud/index.php/s/axacSYXf7YCLcbb/download", path_out) - with zipfile.ZipFile(path_out, 'r') as zip_ref: + with zipfile.ZipFile(path_out, "r") as zip_ref: zip_ref.extractall(data_dir) os.remove(path_out) print(f"Benchmark data downloaded to {benchmark_data_dir}") @@ -63,15 +63,15 @@ def errors_param_for_text_layer(path_base: str, tl_type: str, tl_path: str, para parameters = dict(pdf_with_text_layer="auto", pages="1:1") result_item = OrderedDict() - incorrect_tl_result = errors_param_for_text_layer(benchmark_data_dir, ' incorrect ', 'data_correct_text_layer', parameters) + incorrect_tl_result = errors_param_for_text_layer(benchmark_data_dir, " incorrect ", "data_correct_text_layer", parameters) result_item["percentage_of_guessed_correct_tl"] = 1 - incorrect_tl_result.total_incorrect_files / incorrect_tl_result.total_file_size result_item["list_of_file_with_incorrect_tl"] = incorrect_tl_result.failed - correct_tl_result = errors_param_for_text_layer(benchmark_data_dir, ' correct ', 'data_incorrect_text_layer', parameters) + correct_tl_result = errors_param_for_text_layer(benchmark_data_dir, " correct ", "data_incorrect_text_layer", parameters) result_item["percentage_of_guessed_incorrect_tl"] = 1 - correct_tl_result.total_incorrect_files / correct_tl_result.total_file_size result_item["list_of_file_with_correct_tl"] = correct_tl_result.failed result["guessing_the_correctness_of_the_text"] = result_item with open(path_result, "w") as file_out: json.dump(obj=result, fp=file_out, indent=4, ensure_ascii=False) - print("Save result in" + path_result) + print(f"Save result in {path_result}") diff --git a/dedoc/scripts/create_txtlayer_dataset.py b/scripts/create_txtlayer_dataset.py similarity index 84% rename from dedoc/scripts/create_txtlayer_dataset.py rename to scripts/create_txtlayer_dataset.py index 84fafa28..ca2e196c 100644 --- a/dedoc/scripts/create_txtlayer_dataset.py +++ b/scripts/create_txtlayer_dataset.py @@ -17,9 +17,9 @@ class CorrectTextGenerator: def __init__(self) -> None: - self.citation = re.compile(r'\[\d+]') - self.meta = re.compile(r'\[править \| править код]') - self.symbols = re.compile(r'[→←↑]') + self.citation = re.compile(r"\[\d+]") + self.meta = re.compile(r"\[править \| править код]") + self.symbols = re.compile(r"[→←↑]") self.title_url = "https://{lang}.wikipedia.org/w/api.php?origin=*&action=query&format=json&list=random&rnlimit=1&rnnamespace=0" self.article_url = "https://{lang}.wikipedia.org/w/api.php?origin=*&action=parse&format=json&page={title}&prop=text" @@ -37,15 +37,15 @@ def get_random_text(self, lang: str) -> str: # 2 - Get text the article article_result = requests.post(self.article_url.format(lang=lang, title=title)) article_result_dict = article_result.json() - article = article_result_dict["parse"]["text"]['*'] - bs = BeautifulSoup(article, 'html.parser') + article = article_result_dict["parse"]["text"]["*"] + bs = BeautifulSoup(article, "html.parser") article_text = bs.get_text() # 3 - Clear text of the article from unused symbols - article_text_fixed = re.sub(self.citation, '', article_text) + article_text_fixed = re.sub(self.citation, "", article_text) article_text_fixed = re.sub(self.meta, "", article_text_fixed) article_text_fixed = re.sub(self.symbols, "", article_text_fixed) - article_text_fixed = re.sub(r'\n+', "\n", article_text_fixed) + article_text_fixed = re.sub(r"\n+", "\n", article_text_fixed) except: # noqa article_text_fixed = "" @@ -62,18 +62,22 @@ class EncodingCorruptor(Corruptor): def __init__(self) -> None: self.encodings = { "en": { - "input": ['cp1026'], - "output": ['cp1256', 'cp437', 'cp775', 'cp852', 'cp855', 'cp857', 'cp860', 'cp861', 'cp862', 'cp863', 'cp866', 'gb18030', 'hp_roman8', - 'iso8859_10', 'iso8859_11', 'iso8859_13', 'iso8859_14', 'iso8859_16', 'iso8859_2', 'iso8859_4', 'iso8859_5', 'koi8_r', - 'mac_cyrillic', 'mac_greek', 'mac_latin2', 'mac_roman'] + "input": ["cp1026"], + "output": [ + "cp1256", "cp437", "cp775", "cp852", "cp855", "cp857", "cp860", "cp861", "cp862", "cp863", "cp866", "gb18030", "hp_roman8", + "iso8859_10", "iso8859_11", "iso8859_13", "iso8859_14", "iso8859_16", "iso8859_2", "iso8859_4", "iso8859_5", "koi8_r", + "mac_cyrillic", "mac_greek", "mac_latin2", "mac_roman" + ] }, "ru": { - "input": ['cp855', 'cp866', 'gb18030', 'iso8859_5', 'koi8_r', 'mac_cyrillic', 'utf_8'], - "output": ['cp1026', 'cp1256', 'cp437', 'cp775', 'cp850', 'cp852', 'cp863', 'cp866', 'hp_roman8', 'iso8859_10', 'iso8859_11', - 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_16', 'iso8859_2', 'iso8859_4', 'iso8859_5', 'iso8859_9', 'koi8_r', - 'mac_cyrillic', 'mac_greek', 'mac_latin2', 'mac_roman', 'cp1140', 'cp273', 'cp855', 'cp860', 'cp861', 'cp857', 'cp500', - 'cp862', 'gb18030'] + "input": ["cp855", "cp866", "gb18030", "iso8859_5", "koi8_r", "mac_cyrillic", "utf_8"], + "output": [ + "cp1026", "cp1256", "cp437", "cp775", "cp850", "cp852", "cp863", "cp866", "hp_roman8", "iso8859_10", "iso8859_11", + "iso8859_13", "iso8859_14", "iso8859_15", "iso8859_16", "iso8859_2", "iso8859_4", "iso8859_5", "iso8859_9", "koi8_r", + "mac_cyrillic", "mac_greek", "mac_latin2", "mac_roman", "cp1140", "cp273", "cp855", "cp860", "cp861", "cp857", "cp500", + "cp862", "gb18030" + ] } } @@ -115,7 +119,7 @@ def __init__(self) -> None: self.text_color = (0, 0, 0) self.background_color = (255, 255, 255) - font_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources", "Arial_Narrow.ttf")) + font_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "labeling", "resources", "Arial_Narrow.ttf")) self.font = ImageFont.truetype(font_path, self.font_size) def corrupt(self, text: str, lang: str) -> str: diff --git a/dedoc/scripts/calc_tesseract_benchmarks.py b/scripts/tesseract_benchmark/calc_tesseract_benchmarks.py similarity index 62% rename from dedoc/scripts/calc_tesseract_benchmarks.py rename to scripts/tesseract_benchmark/calc_tesseract_benchmarks.py index 47d58a8e..07895d0d 100644 --- a/dedoc/scripts/calc_tesseract_benchmarks.py +++ b/scripts/tesseract_benchmark/calc_tesseract_benchmarks.py @@ -1,7 +1,7 @@ import os import re +import time import zipfile -from tempfile import TemporaryDirectory from typing import Dict, List, Tuple import cv2 @@ -11,6 +11,14 @@ from texttable import Texttable from dedoc.config import get_config +from scripts.tesseract_benchmark.ocr_correction import correction, init_correction_step +from scripts.tesseract_benchmark.text_blob_correction import TextBlobCorrector + +WITHOUT_CORRECTION = "" +SAGE_CORRECTION = "_sage-correction" +TEXT_BLOB_CORRECTION = "_textblob-correction" + +USE_CORRECTION_OCR = TEXT_BLOB_CORRECTION def _call_tesseract(image: np.ndarray, language: str, psm: int = 3) -> str: @@ -69,14 +77,16 @@ def _get_avg(array: List) -> float: def _get_avg_by_dataset(statistics: Dict, dataset: str) -> List: - return [_get_avg(statistics[dataset]["ASCII_Spacing_Characters"]), - _get_avg(statistics[dataset]["ASCII_Special_Symbols"]), - _get_avg(statistics[dataset]["ASCII_Digits"]), - _get_avg(statistics[dataset]["ASCII_Uppercase_Letters"]), - _get_avg(statistics[dataset]["Latin1_Special_Symbols"]), - _get_avg(statistics[dataset]["Cyrillic"]), - sum(statistics[dataset]["Amount of words"]), - _get_avg(statistics[dataset]["Accuracy"])] + return [ + _get_avg(statistics[dataset]["ASCII_Spacing_Characters"]), + _get_avg(statistics[dataset]["ASCII_Special_Symbols"]), + _get_avg(statistics[dataset]["ASCII_Digits"]), + _get_avg(statistics[dataset]["ASCII_Uppercase_Letters"]), + _get_avg(statistics[dataset]["Latin1_Special_Symbols"]), + _get_avg(statistics[dataset]["Cyrillic"]), + sum(statistics[dataset]["Amount of words"]), + _get_avg(statistics[dataset]["Accuracy"]) + ] def __parse_symbol_info(lines: List[str]) -> Tuple[List, int]: @@ -98,7 +108,7 @@ def __parse_symbol_info(lines: List[str]) -> Tuple[List, int]: def __parse_ocr_errors(lines: List[str]) -> List: ocr_errors = [] matched_errors = [(line_num, line) for line_num, line in enumerate(lines) if "Errors Marked Correct-Generated" in line][0] - for num, line in enumerate(lines[matched_errors[0] + 1:]): + for line in lines[matched_errors[0] + 1:]: # example line: " 2 0 { 6}-{б}" errors = re.findall(r"(\d+)", line)[0] chars = re.findall(r"{(.*)}-{(.*)}", line)[0] @@ -109,7 +119,7 @@ def __parse_ocr_errors(lines: List[str]) -> List: def __get_summary_symbol_error(path_reports: str) -> Texttable: # 1 - call accsum for get summary of all reports - accuracy_script_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "accsum")) + accuracy_script_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "accsum")) if os.path.exists(f"{path_reports}/../accsum_report.txt"): os.remove(f"{path_reports}/../accsum_report.txt") @@ -118,7 +128,7 @@ def __get_summary_symbol_error(path_reports: str) -> Texttable: command = f"{accuracy_script_path} {file_reports} >> {path_reports}/../accsum_report.txt" os.system(command) - accsum_report_path = os.path.join(path_reports, "../accsum_report.txt") + accsum_report_path = os.path.join(path_reports, "..", "accsum_report.txt") # 2 - parse report info with open(accsum_report_path, "r") as f: @@ -150,8 +160,12 @@ def __get_summary_symbol_error(path_reports: str) -> Texttable: def __create_statistic_tables(statistics: dict, accuracy_values: List) -> Tuple[Texttable, Texttable]: accs = [["Dataset", "Image name", "--psm", "Amount of words", "Accuracy OCR"]] - accs_common = [["Dataset", "ASCII_Spacing_Chars", "ASCII_Special_Symbols", "ASCII_Digits", - "ASCII_Uppercase_Chars", "Latin1_Special_Symbols", "Cyrillic", "Amount of words", "AVG Accuracy"]] + accs_common = [ + [ + "Dataset", "ASCII_Spacing_Chars", "ASCII_Special_Symbols", "ASCII_Digits", "ASCII_Uppercase_Chars", "Latin1_Special_Symbols", "Cyrillic", + "Amount of words", "AVG Accuracy" + ] + ] table_accuracy_per_image = Texttable() accs.extend(accuracy_values) @@ -169,9 +183,28 @@ def __create_statistic_tables(statistics: dict, accuracy_values: List) -> Tuple[ return table_common, table_accuracy_per_image -def __calculate_ocr_reports(cache_dir_accuracy: str, benchmark_data_path: str) -> Tuple[Texttable, Texttable]: +def calculate_accuracy_script(tmp_gt_path: str, tmp_prediction_path: str, accuracy_path: str) -> None: + # calculation accuracy build for Ubuntu from source https://github.com/eddieantonio/ocreval + accuracy_script_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "accuracy")) + command = f"{accuracy_script_path} {tmp_gt_path} {tmp_prediction_path} >> {accuracy_path}" + os.system(command) + + +def __calculate_ocr_reports(cache_dir_accuracy: str, benchmark_data_path: str, cache_dir: str) -> Tuple[Texttable, Texttable]: statistics = {} accuracy_values = [] + correction_times = [] + + result_dir = os.path.join(cache_dir, "result_ocr") + os.makedirs(result_dir, exist_ok=True) + + corrector, corrected_path = None, None + if USE_CORRECTION_OCR == SAGE_CORRECTION: + corrector, corrected_path = init_correction_step(cache_dir) + elif USE_CORRECTION_OCR == TEXT_BLOB_CORRECTION: + corrector = TextBlobCorrector() + corrected_path = os.path.join(cache_dir, "result_corrected") + os.makedirs(corrected_path, exist_ok=True) with zipfile.ZipFile(benchmark_data_path, "r") as arch_file: names_dirs = [member.filename for member in arch_file.infolist() if member.file_size > 0] @@ -191,41 +224,50 @@ def __calculate_ocr_reports(cache_dir_accuracy: str, benchmark_data_path: str) - gt_path = os.path.join(base_zip, dataset_name, "gts", f"{base_name}.txt") imgs_path = os.path.join(base_zip, dataset_name, "imgs", img_name) accuracy_path = os.path.join(cache_dir_accuracy, f"{dataset_name}_{base_name}_accuracy.txt") - - with TemporaryDirectory() as tmpdir: - tmp_gt_path = os.path.join(tmpdir, "tmp_gt.txt") - tmp_ocr_path = os.path.join(tmpdir, "tmp_ocr.txt") - - try: - with arch_file.open(gt_path) as gt_file, open(tmp_gt_path, "wb") as tmp_gt_file, open(tmp_ocr_path, "w") as tmp_ocr_file: - - gt_text = gt_file.read().decode("utf-8") - word_cnt = len(gt_text.split()) - - tmp_gt_file.write(gt_text.encode()) # extraction gt from zip - tmp_gt_file.flush() - - arch_file.extract(imgs_path, tmpdir) - image = cv2.imread(tmpdir + "/" + imgs_path) - - # call ocr - psm = 6 if dataset_name == "english-words" else 4 - text = _call_tesseract(image, "rus+eng", psm=psm) - tmp_ocr_file.write(text) - tmp_ocr_file.flush() - - # calculation accuracy build for Ubuntu from source https://github.com/eddieantonio/ocreval - accuracy_script_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "accuracy")) - command = f"{accuracy_script_path} {tmp_gt_path} {tmp_ocr_path} >> {accuracy_path}" - os.system(command) - - statistics = _update_statistics_by_dataset(statistics, dataset_name, accuracy_path, word_cnt) - accuracy_values.append([dataset_name, base_name, psm, word_cnt, statistics[dataset_name]["Accuracy"][-1]]) - - except Exception as ex: - print(ex) - print("If you have problems with libutf8proc.so.2, try the command: `apt install -y libutf8proc-dev`") - + if os.path.exists(accuracy_path): + os.remove(accuracy_path) + + tmp_gt_path = os.path.join(result_dir, f"{img_name}_gt.txt") + tmp_ocr_path = os.path.join(result_dir, f"{img_name}_ocr.txt") + + try: + with arch_file.open(gt_path) as gt_file, open(tmp_gt_path, "wb") as tmp_gt_file, open(tmp_ocr_path, "w") as tmp_ocr_file: + + gt_text = gt_file.read().decode("utf-8") + word_cnt = len(gt_text.split()) + + tmp_gt_file.write(gt_text.encode()) # extraction gt from zip + tmp_gt_file.close() + + arch_file.extract(imgs_path, result_dir) + image = cv2.imread(result_dir + "/" + imgs_path) + + # call ocr + psm = 6 if dataset_name == "english-words" else 4 + text = _call_tesseract(image, "rus+eng", psm=psm) + tmp_ocr_file.write(text) + tmp_ocr_file.close() + + # call correction step + time_b = time.time() + if USE_CORRECTION_OCR in (SAGE_CORRECTION, TEXT_BLOB_CORRECTION): + corrected_text = correction(corrector, text) if USE_CORRECTION_OCR == SAGE_CORRECTION else corrector.correct(text) + tmp_corrected_path = os.path.join(corrected_path, f"{img_name}_ocr.txt") + with open(tmp_corrected_path, "w") as tmp_corrected_file: + tmp_corrected_file.write(corrected_text) + calculate_accuracy_script(tmp_gt_path, tmp_corrected_path, accuracy_path) + else: + calculate_accuracy_script(tmp_gt_path, tmp_ocr_path, accuracy_path) + + correction_times.append(time.time() - time_b) + statistics = _update_statistics_by_dataset(statistics, dataset_name, accuracy_path, word_cnt) + accuracy_values.append([dataset_name, base_name, psm, word_cnt, statistics[dataset_name]["Accuracy"][-1]]) + + except Exception as ex: + print(ex) + print("If you have problems with libutf8proc.so.2, try the command: `apt install -y libutf8proc-dev`") + + print(f"Time mean correction ocr = {np.array(correction_times).mean()}") table_common, table_accuracy_per_image = __create_statistic_tables(statistics, accuracy_values) return table_common, table_accuracy_per_image @@ -240,22 +282,24 @@ def __calculate_ocr_reports(cache_dir_accuracy: str, benchmark_data_path: str) - benchmark_data_path = os.path.join(cache_dir, f"{base_zip}.zip") if not os.path.isfile(benchmark_data_path): - wget.download("https://at.ispras.ru/owncloud/index.php/s/HqKt53BWmR8nCVG/download", benchmark_data_path) + wget.download("https://at.ispras.ru/owncloud/index.php/s/wMyKioKInYITpYT/download", benchmark_data_path) print(f"Benchmark data downloaded to {benchmark_data_path}") else: print(f"Use cached benchmark data from {benchmark_data_path}") assert os.path.isfile(benchmark_data_path) - table_common, table_accuracy_per_image = __calculate_ocr_reports(cache_dir_accuracy, benchmark_data_path) + table_common, table_accuracy_per_image = __calculate_ocr_reports(cache_dir_accuracy, benchmark_data_path, cache_dir) table_errors = __get_summary_symbol_error(path_reports=cache_dir_accuracy) - with open(os.path.join(output_dir, "tesseract_benchmark.txt"), "w") as res_file: - res_file.write(f"Tesseract version is {pytesseract.get_tesseract_version()}\nTable 1 - Accuracy for each file\n") + with open(os.path.join(output_dir, f"tesseract_benchmark{USE_CORRECTION_OCR}.txt"), "w") as res_file: + res_file.write(f"Tesseract version is {pytesseract.get_tesseract_version()}\n") + res_file.write(f"Correction step: {USE_CORRECTION_OCR}\n") + res_file.write("\nTable 1 - Accuracy for each file\n") res_file.write(table_accuracy_per_image.draw()) - res_file.write(f"\n\nTable 2 - AVG by each type of symbols:\n") + res_file.write("\n\nTable 2 - AVG by each type of symbols:\n") res_file.write(table_common.draw()) - res_file.write(f"\n\nTable 3 -OCR error by symbol:\n") + res_file.write("\n\nTable 3 -OCR error by symbol:\n") res_file.write(table_errors.draw()) print(f"Tesseract version is {pytesseract.get_tesseract_version()}") diff --git a/scripts/tesseract_benchmark/ocr_correction.py b/scripts/tesseract_benchmark/ocr_correction.py new file mode 100644 index 00000000..89fb87a1 --- /dev/null +++ b/scripts/tesseract_benchmark/ocr_correction.py @@ -0,0 +1,41 @@ +import os +from typing import Tuple + +import torch +from sage.spelling_correction import AvailableCorrectors +from sage.spelling_correction import RuM2M100ModelForSpellingCorrection +from sage.spelling_correction.corrector import Corrector + +""" +Install sage library (for ocr correction step): +git clone https://github.com/ai-forever/sage.git +cd sage +pip install . +pip install -r requirements.txt + +Note: sage use 5.2 Gb GPU ...... +""" +USE_GPU = True + + +def correction(model: Corrector, ocr_text: str) -> str: + + corrected_lines = [] + for line in ocr_text.split("\n"): + corrected_lines.append(model.correct(line)[0]) + corrected_text = "\n".join(corrected_lines) + + return corrected_text + + +def init_correction_step(cache_dir: str) -> Tuple[Corrector, str]: + + corrected_path = os.path.join(cache_dir, "result_corrected") + os.makedirs(corrected_path, exist_ok=True) + corrector = RuM2M100ModelForSpellingCorrection.from_pretrained(AvailableCorrectors.m2m100_1B.value) # 4.49 Gb model (pytorch_model.bin) + if torch.cuda.is_available() and USE_GPU: + corrector.model.to(torch.device("cuda:0")) + print("use CUDA") + else: + print("use CPU") + return corrector, corrected_path diff --git a/scripts/tesseract_benchmark/requirements.txt b/scripts/tesseract_benchmark/requirements.txt new file mode 100644 index 00000000..5ef9a438 --- /dev/null +++ b/scripts/tesseract_benchmark/requirements.txt @@ -0,0 +1 @@ +textblob==0.17.1 \ No newline at end of file diff --git a/scripts/tesseract_benchmark/text_blob_correction.py b/scripts/tesseract_benchmark/text_blob_correction.py new file mode 100644 index 00000000..73e8d70e --- /dev/null +++ b/scripts/tesseract_benchmark/text_blob_correction.py @@ -0,0 +1,9 @@ +from textblob import TextBlob + + +class TextBlobCorrector: + def __init__(self) -> None: + return + + def correct(self, text: str) -> str: + return str(TextBlob(text).correct()) diff --git a/dedoc/scripts/test_words_bbox_extraction.py b/scripts/test_words_bbox_extraction.py similarity index 83% rename from dedoc/scripts/test_words_bbox_extraction.py rename to scripts/test_words_bbox_extraction.py index 888c3273..9dde8702 100644 --- a/dedoc/scripts/test_words_bbox_extraction.py +++ b/scripts/test_words_bbox_extraction.py @@ -28,8 +28,13 @@ def __extract_conf_annotation(self, anns_conf: List[dict], ann_bbox: dict, text: interval = e - b if interval > 0: confs.append(ann_conf["value"]) - debug.append({f"{ann_conf['value']}[{b}:{e}]": [ - interval, f"bbox:[{ann_bbox['start']}:{ann_bbox['end']}], {text[ann_bbox['start']:ann_bbox['end']]}"]}) + debug.append( + { + f"{ann_conf['value']}[{b}:{e}]": [ + interval, f"bbox:[{ann_bbox['start']}:{ann_bbox['end']}], {text[ann_bbox['start']:ann_bbox['end']]}" + ] + } + ) if DETAILED_DEBUG: print(debug) @@ -44,8 +49,13 @@ def __extract_texttype_annotation(self, anns_type: List[dict], ann_bbox: dict, t interval = e - b if interval > 0: text_type = ann_type["value"] - debug.append({f"{ann_type['value']}:{b}:{e}": [ - interval, f"bbox:[{ann_bbox['start']}:{ann_bbox['end']}], {text[ann_bbox['start']:ann_bbox['end']]}"]}) + debug.append( + { + f"{ann_type['value']}:{b}:{e}": [ + interval, f"bbox:[{ann_bbox['start']}:{ann_bbox['end']}], {text[ann_bbox['start']:ann_bbox['end']]}" + ] + } + ) if DETAILED_DEBUG: print(debug) @@ -67,8 +77,9 @@ def __get_words_annotation(self, structure: dict) -> List[BboxWithConfsType]: confs = self.__extract_conf_annotation(anns_conf, ann_bbox, node["text"]) text_type = self.__extract_texttype_annotation(anns_type, ann_bbox, node["text"]) - words_annotation.append(BboxWithConfsType(start=ann_bbox["start"], end=ann_bbox["end"], bbox=ann_bbox["value"], confs=confs, - text_type=text_type)) + words_annotation.append( + BboxWithConfsType(start=ann_bbox["start"], end=ann_bbox["end"], bbox=ann_bbox["value"], confs=confs, text_type=text_type) + ) stack.extend(node["subparagraphs"]) @@ -91,13 +102,13 @@ def __get_words_annotation_from_cell(self, table: dict) -> List[BboxWithConfsTyp return words_annotation def __normalize_font_thickness(self, image: np.ndarray) -> Tuple[float, int]: - FONT_SCALE = 6e-4 - THICKNESS_SCALE = 1e-3 + font_scale = 6e-4 + thickness_scale = 1e-3 height, width, _ = image.shape - font_scale = min(width, height) * FONT_SCALE - thickness = math.ceil(min(width, height) * THICKNESS_SCALE) + font = min(width, height) * font_scale + thickness = math.ceil(min(width, height) * thickness_scale) - return font_scale, thickness + return font, thickness def __rotate_coordinate(self, x: int, y: int, xc: float, yc: float, angle: float) -> Tuple[int, int]: rad = angle * math.pi / 180 @@ -123,8 +134,10 @@ def __draw_word_annotations(self, image: np.ndarray, word_annotations: List[Bbox cv2.rectangle(image, p1, p2, (0, 255, 0) if ann.text_type == "typewritten" else (255, 0, 0)) text = ",".join(ann.confs) if ann.confs != [] else "None" - cv2.putText(image, text, (int(bbox["x_top_left"] * bbox["page_width"]), int(bbox["y_top_left"] * bbox["page_height"])), - cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), thickness) + cv2.putText( + image, text, (int(bbox["x_top_left"] * bbox["page_width"]), int(bbox["y_top_left"] * bbox["page_height"])), cv2.FONT_HERSHEY_SIMPLEX, + font_scale, (0, 0, 255), thickness + ) return image def __draw_tables_words(self, tables: List[dict], image: np.ndarray) -> np.ndarray: @@ -135,9 +148,9 @@ def __draw_tables_words(self, tables: List[dict], image: np.ndarray) -> np.ndarr image = self.__draw_word_annotations(image, word_annotations, angle=table_angle) return image - def test_pdf_documents(self): + def test_pdf_documents(self) -> None: filename_parameters_outputdir = [ - ["pdf_with_text_layer/english_doc.pdf", dict(pdf_with_text_layer="true"), "pdfminer_reader"], + ["pdf_with_text_layer/english_doc.pdf", dict(pdf_with_text_layer="true"), "pdfminer_reader"], ["pdf_with_text_layer/english_doc.pdf", dict(pdf_with_text_layer="tabby"), "tabby_reader"] ] @@ -158,12 +171,13 @@ def test_pdf_documents(self): image = self.__draw_tables_words(tables, image) cv2.imwrite(os.path.join(output_path, f"{os.path.split(file_name)[1]}.png"), image) - def test_table_word_extraction(self): - output_path = os.path.join(self.output_path, 'tables') + def test_table_word_extraction(self) -> None: + output_path = os.path.join(self.output_path, "tables") os.makedirs(output_path, exist_ok=True) - file_names = ["tables/example_with_table5.png", "tables/example_with_table3.png", "tables/example_with_table4.jpg", - "tables/example_with_table6.png", "tables/example_with_table_horizontal_union.jpg", - "scanned/orient_1.png", "tables/rotated_table.png"] + file_names = [ + "tables/example_with_table5.png", "tables/example_with_table3.png", "tables/example_with_table4.jpg", "tables/example_with_table6.png", + "tables/example_with_table_horizontal_union.jpg", "scanned/orient_1.png", "tables/rotated_table.png" + ] for file_name in file_names: result = self._send_request(file_name, data=dict()) @@ -182,7 +196,7 @@ def test_table_word_extraction(self): if len(tables) > 0: image = self.__draw_tables_words(tables, image) - cv2.imwrite(os.path.join(output_path, file_name.split('/')[-1]), image) + cv2.imwrite(os.path.join(output_path, file_name.split("/")[-1]), image) def test_document_table_split_last_column(self) -> None: filename_to_parameters = { diff --git a/dedoc/train_dataset/taskers/images_creators/concrete_creators/__init__.py b/scripts/train/__init__.py similarity index 100% rename from dedoc/train_dataset/taskers/images_creators/concrete_creators/__init__.py rename to scripts/train/__init__.py diff --git a/dedoc/scripts/train/train_acc_orientation_classifier.py b/scripts/train/train_acc_orientation_classifier.py similarity index 64% rename from dedoc/scripts/train/train_acc_orientation_classifier.py rename to scripts/train/train_acc_orientation_classifier.py index 825e5707..05f36083 100644 --- a/dedoc/scripts/train/train_acc_orientation_classifier.py +++ b/scripts/train/train_acc_orientation_classifier.py @@ -8,24 +8,20 @@ from torch import optim from torch.utils.data import DataLoader +from dedoc.config import get_config from dedoc.readers.pdf_reader.pdf_image_reader.columns_orientation_classifier.columns_orientation_classifier import ColumnsOrientationClassifier from dedoc.readers.pdf_reader.pdf_image_reader.columns_orientation_classifier.dataset_executor import DataLoaderImageOrient parser = argparse.ArgumentParser() -checkpoint_path_save = os.path.abspath(os.path.join(os.path.dirname(__file__), - "../../../resources/efficient_net_b0_fixed.pth")) -checkpoint_path_load = os.path.abspath(os.path.join(os.path.dirname(__file__), - "../../../resources/efficient_net_b0_fixed.pth")) -checkpoint_path = "../../../resources" +checkpoint_path_save = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources", "efficient_net_b0_fixed.pth")) +checkpoint_path_load = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources", "efficient_net_b0_fixed.pth")) +checkpoint_path = "../../resources" parser.add_argument("-t", "--train", type=bool, help="run for train model", default=False) -parser.add_argument("-s", "--checkpoint_save", help="Path to checkpoint for save or load", - default=checkpoint_path_save) -parser.add_argument("-l", "--checkpoint_load", help="Path to checkpoint for load", - default=checkpoint_path_load) +parser.add_argument("-s", "--checkpoint_save", help="Path to checkpoint for save or load", default=checkpoint_path_save) +parser.add_argument("-l", "--checkpoint_load", help="Path to checkpoint for load", default=checkpoint_path_load) parser.add_argument("-f", "--from_checkpoint", type=bool, help="run for train model", default=True) -parser.add_argument("-d", "--input_data_folder", help="Path to data with folders train or test", - default="/home/nasty/data/columns_orientation") +parser.add_argument("-d", "--input_data_folder", help="Path to data with folders train or test") args = parser.parse_args() BATCH_SIZE = 1 @@ -41,23 +37,20 @@ def accuracy_step(data_executor: DataLoaderImageOrient, net_executor: ColumnsOri """ net_executor.net.eval() testloader = data_executor.load_dataset( - csv_path=os.path.join(args.input_data_folder, 'test/labels.csv'), + csv_path=os.path.join(args.input_data_folder, "test/labels.csv"), image_path=args.input_data_folder, batch_size=BATCH_SIZE ) dataiter = iter(testloader) sample = dataiter.__next__() - _, orientation, columns = sample['image'], sample['orientation'], sample['columns'] + _, orientation, columns = sample["image"], sample["orientation"], sample["columns"] - print('GroundTruth: orientation {}, columns {}'.format(orientation, columns)) + print(f"GroundTruth: orientation {orientation}, columns {columns}") calc_accuracy_by_classes(testloader, data_executor.classes, net_executor, batch_size=1) -def calc_accuracy_by_classes(testloader: DataLoader, - classes: List, - classifier: ColumnsOrientationClassifier, - batch_size: int = 1) -> None: +def calc_accuracy_by_classes(testloader: DataLoader, classes: List, classifier: ColumnsOrientationClassifier, batch_size: int = 1) -> None: """ Function calculates accuracy ba each class :param testloader: DataLoader @@ -66,13 +59,13 @@ def calc_accuracy_by_classes(testloader: DataLoader, :param batch_size: size of batch :return: """ - class_correct = list(0. for i in range(len(classes))) - class_total = list(0. for i in range(len(classes))) + class_correct = list(0. for _ in range(len(classes))) + class_total = list(0. for _ in range(len(classes))) time_predict = 0 cnt_predict = 0 with torch.no_grad(): for data in testloader: - images, orientation, columns = data['image'], data['orientation'], data['columns'] + images, orientation, columns = data["image"], data["orientation"], data["columns"] time_begin = time() outputs = classifier.net(images.float().to(classifier.device)) @@ -97,21 +90,16 @@ def calc_accuracy_by_classes(testloader: DataLoader, class_correct[columns_i] += orientation_bool_predict class_total[columns_i] += 1 if not orientation_bool_predict or not columns_bool_predict: - print('{} predict as \norientation: {} \ncolumns: {}'.format(data['image_name'][i], - classes[2 + orientation_predicted[i]], - classes[columns_predicted[i]])) + print( # noqa + f'{data["image_name"][i]} predict as \norientation: {classes[2 + orientation_predicted[i]]} \ncolumns: {classes[columns_predicted[i]]}' + ) for i in range(len(classes)): - print('Accuracy of %5s : %2d %%' % ( - classes[i], 100 * class_correct[i] / class_total[i] if class_total[i] != 0 else 0)) - print('=== AVG Time predict {}'.format(time_predict / cnt_predict)) + print(f"Accuracy of {classes[i]:5s} : {100 * class_correct[i] / class_total[i] if class_total[i] != 0 else 0:2d} %") + print(f"=== AVG Time predict {time_predict / cnt_predict}") -def train_model(trainloader: DataLoader, - checkpoint_path_save: str, - classifier: ColumnsOrientationClassifier, - epoch_cnt: int = 7, - save_step: int = 500) -> None: +def train_model(trainloader: DataLoader, checkpoint_path_save: str, classifier: ColumnsOrientationClassifier, epoch_cnt: int = 7, save_step: int = 500) -> None: """ Function for train orientation classifier :param trainloader: DataLoader @@ -128,7 +116,7 @@ def train_model(trainloader: DataLoader, running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] - inputs, orientation, columns = data['image'], data['orientation'], data['columns'] + inputs, orientation, columns = data["image"], data["orientation"], data["columns"] # zero the parameter gradients optimizer.zero_grad() @@ -136,17 +124,14 @@ def train_model(trainloader: DataLoader, # forward + backward + optimize outputs = classifier.net(inputs.float().to(classifier.device)) - loss = criterion(outputs[:, :2], - columns.to(classifier.device)) + criterion(outputs[:, 2:], - orientation.to(classifier.device)) + loss = criterion(outputs[:, :2], columns.to(classifier.device)) + criterion(outputs[:, 2:], orientation.to(classifier.device)) loss.backward() optimizer.step() running_loss += loss.item() # print statistics if i % 100 == 99: - print('[%d, %5d] loss: %.3f' % - (epoch + 1, i + 1, running_loss / 100)) + print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}") running_loss = 0.0 # save checkpoint @@ -154,14 +139,14 @@ def train_model(trainloader: DataLoader, classifier.save_weights(checkpoint_path_save) classifier.save_weights(checkpoint_path_save) - print('Finished Training') + print("Finished Training") def train_step(data_executor: DataLoaderImageOrient, classifier: ColumnsOrientationClassifier) -> None: classifier.net.train() # Part 1 - load datas trainloader = data_executor.load_dataset( - csv_path=os.path.join(args.input_data_folder, 'train/labels.csv'), + csv_path=os.path.join(args.input_data_folder, "train/labels.csv"), image_path=args.input_data_folder, batch_size=BATCH_SIZE ) @@ -169,22 +154,20 @@ def train_step(data_executor: DataLoaderImageOrient, classifier: ColumnsOrientat # get some random training images dataiter = iter(trainloader) sample = dataiter.__next__() - _, orientation, columns = sample['image'], sample['orientation'], sample['columns'] + _, orientation, columns = sample["image"], sample["orientation"], sample["columns"] # print labels - print(' '.join('%5s' % data_executor.classes[orientation[j]] for j in range(BATCH_SIZE))) - print(' '.join('%5s' % data_executor.classes[columns[j]] for j in range(BATCH_SIZE))) + print(" ".join(f"{data_executor.classes[orientation[j]]:5s}" for j in range(BATCH_SIZE))) + print(" ".join(f"{data_executor.classes[columns[j]]:5s}" for j in range(BATCH_SIZE))) # Part 2 - train model train_model(trainloader, args.checkpoint_save, classifier) if __name__ == "__main__": - from dedoc.config import _config as config + config = get_config() data_executor = DataLoaderImageOrient() - net = ColumnsOrientationClassifier(on_gpu=True, - checkpoint_path=checkpoint_path if not args.train else '', - config=config) + net = ColumnsOrientationClassifier(on_gpu=True, checkpoint_path=checkpoint_path if not args.train else "", config=config) if args.train: train_step(data_executor, net) else: diff --git a/dedoc/scripts/train/train_diploma_line_classifier.py b/scripts/train/train_diploma_line_classifier.py similarity index 73% rename from dedoc/scripts/train/train_diploma_line_classifier.py rename to scripts/train/train_diploma_line_classifier.py index 1cbf03a7..71a4c900 100644 --- a/dedoc/scripts/train/train_diploma_line_classifier.py +++ b/scripts/train/train_diploma_line_classifier.py @@ -4,7 +4,7 @@ from dedoc.config import _config as config from dedoc.structure_extractors.feature_extractors.diploma_feature_extractor import DiplomaFeatureExtractor -from dedoc.train_dataset.trainer.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer +from scripts.train.trainers.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer def skip_labels(label: str) -> Optional[str]: @@ -17,16 +17,14 @@ def skip_labels(label: str) -> Optional[str]: classifier_name = "diploma_classifier" -clf_resources_path = os.path.join(os.path.expanduser('~'), ".cache", "dedoc", "resources", "line_type_classifiers") +clf_resources_path = os.path.join(os.path.expanduser("~"), ".cache", "dedoc", "resources", "line_type_classifiers") os.makedirs(clf_resources_path, exist_ok=True) -resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "resources")) +resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources")) assert os.path.isdir(resources_path) -path_out = os.path.join(clf_resources_path, "{}.pkl.gz".format(classifier_name)) -path_scores = os.path.join(resources_path, "benchmarks", "{}_scores.json".format(classifier_name)) -path_feature_importances = os.path.join(resources_path, - "feature_importances", - "{}_feature_importances.xlsx".format(classifier_name)) +path_out = os.path.join(clf_resources_path, f"{classifier_name}.pkl.gz") +path_scores = os.path.join(resources_path, "benchmarks", f"{classifier_name}_scores.json") +path_feature_importances = os.path.join(resources_path, "feature_importances", f"{classifier_name}_feature_importances.xlsx") feature_extractor = DiplomaFeatureExtractor() classifier_parameters = dict(learning_rate=0.5, diff --git a/dedoc/scripts/train/train_law_line_classifier.py b/scripts/train/train_law_line_classifier.py similarity index 80% rename from dedoc/scripts/train/train_law_line_classifier.py rename to scripts/train/train_law_line_classifier.py index 9dc89167..7b6dd416 100644 --- a/dedoc/scripts/train/train_law_line_classifier.py +++ b/scripts/train/train_law_line_classifier.py @@ -4,8 +4,8 @@ from dedoc.config import _config as config from dedoc.structure_extractors.feature_extractors.law_text_features import LawTextFeatures -from dedoc.train_dataset.data_structures.line_with_label import LineWithLabel -from dedoc.train_dataset.trainer.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer +from scripts.train.trainers.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer +from train_dataset.data_structures.line_with_label import LineWithLabel def transform_labels(label: str) -> Optional[str]: @@ -29,14 +29,12 @@ def transform_labels(label: str) -> Optional[str]: txt_classifier = True -resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "..", "resources")) +resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources")) assert os.path.isdir(resources_path) classifier_name = "law_txt_classifier" if txt_classifier else "law_classifier" -path_out = os.path.join(resources_path, "{}.pkl.gz".format(classifier_name)) -path_scores = os.path.join(resources_path, "benchmarks", "{}_scores.json".format(classifier_name)) -path_feature_importances = os.path.join(resources_path, - "feature_importances", - "{}_feature_importances.xlsx".format(classifier_name)) +path_out = os.path.join(resources_path, f"{classifier_name}.pkl.gz") +path_scores = os.path.join(resources_path, "benchmarks", f"{classifier_name}_scores.json") +path_feature_importances = os.path.join(resources_path, "feature_importances", f"{classifier_name}_feature_importances.xlsx") feature_extractor = LawTextFeatures(text_features_only=txt_classifier) classifier_parameters = dict(learning_rate=0.8, diff --git a/dedoc/scripts/train/train_mle_language_model.py b/scripts/train/train_mle_language_model.py similarity index 87% rename from dedoc/scripts/train/train_mle_language_model.py rename to scripts/train/train_mle_language_model.py index 1c64ec4b..8488f41a 100644 --- a/dedoc/scripts/train/train_mle_language_model.py +++ b/scripts/train/train_mle_language_model.py @@ -16,11 +16,11 @@ def tokenize_doc(text_layer: str) -> str: # converting to lowercase text_layer = text_layer.lower() # remove all the special characters - document = re.sub(r'\W', ' ', text_layer) + document = re.sub(r"\W", " ", text_layer) # remove all single characters - document = re.sub(r'\^[a-zA-Z]\s+', ' ', document) + document = re.sub(r"\^[a-zA-Z]\s+", " ", document) # substituting multiple spaces with single space - document = re.sub(r'\s+', ' ', document, flags=re.I) + document = re.sub(r"\s+", " ", document, flags=re.I) return document @@ -42,7 +42,7 @@ def main() -> None: print(files) for writer in file: try: - with open(path_big_data + files + '/' + writer) as f: + with open(path_big_data + files + "/" + writer) as f: text = f.read() document = tokenize_doc(text) documents.append(document) @@ -61,5 +61,5 @@ def main() -> None: pickle.dump(language_model_mle, f) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/dedoc/scripts/train/train_nn_line_classifier_law.py b/scripts/train/train_nn_line_classifier_law.py similarity index 74% rename from dedoc/scripts/train/train_nn_line_classifier_law.py rename to scripts/train/train_nn_line_classifier_law.py index 34ca5249..2aa6e0d5 100644 --- a/dedoc/scripts/train/train_nn_line_classifier_law.py +++ b/scripts/train/train_nn_line_classifier_law.py @@ -4,8 +4,8 @@ from dedoc.config import _config as config from dedoc.structure_extractors.feature_extractors.law_text_features import LawTextFeatures -from dedoc.train_dataset.data_structures.line_with_label import LineWithLabel -from dedoc.train_dataset.trainer.logreg_line_classifier_trainer import LogRegLineClassifierTrainer +from scripts.train.trainers.logreg_line_classifier_trainer import LogRegLineClassifierTrainer +from train_dataset.data_structures.line_with_label import LineWithLabel def transform_labels(label: str) -> Optional[str]: @@ -28,16 +28,14 @@ def transform_labels(label: str) -> Optional[str]: txt_classifier = True -resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "..", "resources")) +resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources")) assert os.path.isdir(resources_path) classifier_name = "law_txt_classifier" if txt_classifier else "law_classifier" -path_out = os.path.join(resources_path, "{}_nn.pkl.gz".format(classifier_name)) -path_feature_importances = os.path.join(resources_path, - "feature_importances", - "{}_feature_importances_nn.xlsx".format(classifier_name)) +path_out = os.path.join(resources_path, f"{classifier_name}_nn.pkl.gz") +path_feature_importances = os.path.join(resources_path, "feature_importances", f"{classifier_name}_feature_importances_nn.xlsx") feature_extractor = LawTextFeatures(text_features_only=txt_classifier) -path_scores = os.path.join(resources_path, "benchmarks", "{}_nn_scores.json".format(classifier_name)) +path_scores = os.path.join(resources_path, "benchmarks", f"{classifier_name}_nn_scores.json") classifier_parameters = dict(learning_rate=0.8, n_estimators=300, booster="gbtree", @@ -51,9 +49,7 @@ def get_sample_weight(line: LineWithLabel) -> float: text = line.line.lower().strip() regexps = LawTextFeatures.named_regexp application_regexp = LawTextFeatures.regexp_application_begin - regexp_weight = (50 - if any([regexp.match(text) for regexp in regexps]) or application_regexp.match(text.lower()) - else 1) + regexp_weight = (50 if any([regexp.match(text) for regexp in regexps]) or application_regexp.match(text.lower()) else 1) return regexp_weight * class_weight diff --git a/dedoc/scripts/train/train_paragraph_classifier.py b/scripts/train/train_paragraph_classifier.py similarity index 69% rename from dedoc/scripts/train/train_paragraph_classifier.py rename to scripts/train/train_paragraph_classifier.py index 9ae23ef1..0f2a6ba5 100644 --- a/dedoc/scripts/train/train_paragraph_classifier.py +++ b/scripts/train/train_paragraph_classifier.py @@ -4,7 +4,7 @@ from dedoc.config import _config as config from dedoc.readers.pdf_reader.pdf_image_reader.paragraph_extractor.paragraph_features import ParagraphFeatureExtractor -from dedoc.train_dataset.trainer.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer +from scripts.train.trainers.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer def skip_labels(label: str) -> Optional[str]: @@ -15,13 +15,11 @@ def skip_labels(label: str) -> Optional[str]: classifier_name = "paragraph_classifier" -resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "..", "resources")) +resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources")) assert os.path.isdir(resources_path) -path_out = os.path.join(resources_path, "{}.pkl.gz".format(classifier_name)) -path_scores = os.path.join(resources_path, "benchmarks", "{}_scores.json".format(classifier_name)) -path_feature_importances = os.path.join(resources_path, - "feature_importances", - "{}_feature_importances.xlsx".format(classifier_name)) +path_out = os.path.join(resources_path, f"{classifier_name}.pkl.gz") +path_scores = os.path.join(resources_path, "benchmarks", f"{classifier_name}_scores.json") +path_feature_importances = os.path.join(resources_path, "feature_importances", f"{classifier_name}_feature_importances.xlsx") feature_extractor = ParagraphFeatureExtractor(config=config) @@ -48,4 +46,4 @@ def skip_labels(label: str) -> Optional[str]: ) trainer.fit(cross_val_only=False, save_errors_images=False) -print("successfully train {} classifier".format(classifier_name)) +print(f"successfully train {classifier_name} classifier") diff --git a/dedoc/scripts/train/train_txtlayer_classifier.py b/scripts/train/train_txtlayer_classifier.py similarity index 86% rename from dedoc/scripts/train/train_txtlayer_classifier.py rename to scripts/train/train_txtlayer_classifier.py index 152c11c9..ed3deaa4 100644 --- a/dedoc/scripts/train/train_txtlayer_classifier.py +++ b/scripts/train/train_txtlayer_classifier.py @@ -45,11 +45,11 @@ def get_texts_and_targets(self) -> Tuple[List[str], List[int]]: with open(path, mode="r") as f: text = f.read() except Exception as e: - print(f'Bad file {str(e)}: {path}') + print(f"Bad file {str(e)}: {path}") continue if len(text.strip()) == 0: - print(f'Empty file: {path}') + print(f"Empty file: {path}") continue texts.append(text) @@ -66,7 +66,7 @@ def get_texts_and_targets(self) -> Tuple[List[str], List[int]]: if not os.path.isdir(txtlayer_classifier_dataset_dir): path_out = os.path.join(data_dir, "data.zip") wget.download("https://at.ispras.ru/owncloud/index.php/s/z9WLFiKKFo2WMgW/download", path_out) - with zipfile.ZipFile(path_out, 'r') as zip_ref: + with zipfile.ZipFile(path_out, "r") as zip_ref: zip_ref.extractall(data_dir) os.remove(path_out) print(f"Dataset downloaded to {txtlayer_classifier_dataset_dir}") @@ -85,20 +85,16 @@ def get_texts_and_targets(self) -> Tuple[List[str], List[int]]: stages_data[stage] = dict(features=features, labels=labels) clf = XGBClassifier(random_state=42, learning_rate=0.5, n_estimators=600, booster="gbtree", tree_method="hist", max_depth=3) - clf.fit( - X=stages_data["train"]["features"], - y=stages_data["train"]["labels"], - eval_set=[(stages_data["val"]["features"], stages_data["val"]["labels"])], - ) + clf.fit(X=stages_data["train"]["features"], y=stages_data["train"]["labels"], eval_set=[(stages_data["val"]["features"], stages_data["val"]["labels"])]) test_preds = clf.predict(stages_data["test"]["features"]) score = f1_score(stages_data["test"]["labels"], test_preds) print(f"F1 score = {score}") - resources_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), "..", "..", "..", "resources") - with gzip.open(os.path.join(resources_dir, 'txtlayer_classifier.pkl.gz'), 'wb') as file: + resources_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), "..", "..", "resources") + with gzip.open(os.path.join(resources_dir, "txtlayer_classifier.pkl.gz"), "wb") as file: pickle.dump(clf, file) xgbfir.saveXgbFI(clf, feature_names=features.columns, - OutputXlsxFile=os.path.join(resources_dir, "feature_importances", 'txtlayer_classifier_feature_importances.xlsx')) + OutputXlsxFile=os.path.join(resources_dir, "feature_importances", "txtlayer_classifier_feature_importances.xlsx")) diff --git a/dedoc/scripts/train/train_tz_line_classifier.py b/scripts/train/train_tz_line_classifier.py similarity index 73% rename from dedoc/scripts/train/train_tz_line_classifier.py rename to scripts/train/train_tz_line_classifier.py index 6a1c33c5..da62ca7b 100644 --- a/dedoc/scripts/train/train_tz_line_classifier.py +++ b/scripts/train/train_tz_line_classifier.py @@ -4,7 +4,7 @@ from dedoc.config import _config as config from dedoc.structure_extractors.feature_extractors.tz_feature_extractor import TzTextFeatures -from dedoc.train_dataset.trainer.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer +from scripts.train.trainers.xgboost_line_classifier_trainer import XGBoostLineClassifierTrainer def skip_labels(label: str) -> Optional[str]: @@ -16,13 +16,11 @@ def skip_labels(label: str) -> Optional[str]: txt_classifier = True classifier_name = "tz_txt_classifier" if txt_classifier else "tz_classifier" -resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "..", "resources")) +resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "resources")) assert os.path.isdir(resources_path) -path_out = os.path.join(resources_path, "{}.pkl.gz".format(classifier_name)) -path_scores = os.path.join(resources_path, "benchmarks", "{}_scores.json".format(classifier_name)) -path_feature_importances = os.path.join(resources_path, - "feature_importances", - "{}_feature_importances.xlsx".format(classifier_name)) +path_out = os.path.join(resources_path, f"{classifier_name}.pkl.gz") +path_scores = os.path.join(resources_path, "benchmarks", f"{classifier_name}_scores.json") +path_feature_importances = os.path.join(resources_path, "feature_importances", f"{classifier_name}_feature_importances.xlsx") feature_extractor = TzTextFeatures(text_features_only=txt_classifier) classifier_parameters = dict(learning_rate=0.5, diff --git a/dedoc/train_dataset/trainer/__init__.py b/scripts/train/trainers/__init__.py similarity index 100% rename from dedoc/train_dataset/trainer/__init__.py rename to scripts/train/trainers/__init__.py diff --git a/dedoc/train_dataset/trainer/base_sklearn_line_classifier.py b/scripts/train/trainers/base_sklearn_line_classifier.py similarity index 59% rename from dedoc/train_dataset/trainer/base_sklearn_line_classifier.py rename to scripts/train/trainers/base_sklearn_line_classifier.py index c2c6b28c..0cdfe95d 100644 --- a/dedoc/train_dataset/trainer/base_sklearn_line_classifier.py +++ b/scripts/train/trainers/base_sklearn_line_classifier.py @@ -16,20 +16,28 @@ from xgboost import XGBClassifier from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor -from dedoc.train_dataset.data_structures.line_with_label import LineWithLabel -from dedoc.train_dataset.trainer.data_loader import DataLoader -from dedoc.train_dataset.trainer.dataset import LineClassifierDataset -from dedoc.train_dataset.trainer.errors_saver import ErrorsSaver from dedoc.utils.utils import flatten, identity +from scripts.train.trainers.data_loader import DataLoader +from scripts.train.trainers.dataset import LineClassifierDataset +from scripts.train.trainers.errors_saver import ErrorsSaver +from train_dataset.data_structures.line_with_label import LineWithLabel class BaseClassifier(XGBClassifier): + """ + Base class for a classifier. + See documentation of `XGBClassifier `_ to get more details. + """ def __init__(self, **kwargs: Any) -> None: # noqa super().__init__(**kwargs) class BaseSklearnLineClassifierTrainer: - + """ + Base class for training :class:`~scripts.train.trainers.base_sklearn_line_classifier.BaseClassifier`. + It provides data loading and saving, classifier training and cross-validation. + During cross-validation, classifier errors are saved into `errors` directory inside `tmp_dir`. + """ def __init__(self, data_url: str, logger: logging.Logger, @@ -46,7 +54,23 @@ def __init__(self, n_splits: int = 10, *, config: dict) -> None: - + """ + :param data_url: url to download training data for :class:`~scripts.train.trainers.data_loader.DataLoader` + :param logger: logger for logging details of classifier training + :param feature_extractor: feature extractor for making feature matrix from document lines + :param path_out: full path (with filename) to save a trained model + :param path_scores: full path to the JSON file to save the scores (accuracy) of a trained classifier (won't be saved if None) + :param path_features_importances: full path to the XLSX file to save information about most important features for classifier (won't be saved if None) + :param tmp_dir: path to the directory where to save downloaded training data and the classifier's errors, "/tmp" if None is provided + :param train_size: proportion of the training data, value can be in the diapason (0;1) or (0;100) + :param classifier_parameters: parameters for the classifier initialization + :param label_transformer: function for mapping initial data labels into the labels for classifier training, labels identity if None is provided + :param random_seed: seed for the classifier initialization + :param get_sample_weight: function for `sample_weight` calculating: LineWithLabel->weight [0,1] for setting line importance during classifier training, + LineWithLabel->1 if None is provided + :param n_splits: number of data splits for cross-validation + :param config: any custom configuration + """ self.data_url = data_url self.logger = logger self.feature_extractor = feature_extractor @@ -63,24 +87,29 @@ def __init__(self, self.classifier_parameters = {} if classifier_parameters is None else classifier_parameters self.path_scores = path_scores self.path_errors = os.path.join(self.tmp_dir, "errors") - self.errors_saver = ErrorsSaver(self.path_errors, self.dataset_dir, logger, config=config) + self.errors_saver = ErrorsSaver(self.path_errors, os.path.join(self.dataset_dir, "dataset.zip"), logger, config=config) self.path_features_importances = path_features_importances self.label_transformer = identity if label_transformer is None else label_transformer - if path_out.endswith(".pkl"): - path_out += ".gz" - elif path_out.endswith("pkl.gz"): - pass - else: - path_out += ".pkl.gz" + if not path_out.endswith(".pkl.gz"): + path_out = path_out + ".gz" if path_out.endswith(".pkl") else path_out + ".pkl.gz" + self.path_out = path_out self.config = config self.n_splits = n_splits - def fit(self, no_cache: bool = False, cross_val_only: bool = False, save: bool = False, save_errors_images: bool = False) -> None: + def fit(self, no_cache: bool = False, cross_val_only: bool = False, save_dataset: bool = False, save_errors_images: bool = False) -> None: + """ + Fit the line classifier with cross-validation and errors statistics saving. + + :param no_cache: whether to use cached training data (:class:`~scripts.train.trainers.data_loader.DataLoader`) + :param cross_val_only: whether to execute only cross-validation, without training and saving a resulting classifier + :param save_dataset: whether to save training dataset in form of a feature matrix (:class:`~scripts.train.trainers.dataset.LineClassifierDataset`) + :param save_errors_images: whether to visualize errors line classification (:class:`~scripts.train.trainers.errors_saver.ErrorsSaver`) + """ data = self.data_loader.get_data(no_cache=no_cache) - if save: - self.__save(data=data, path=self.tmp_dir) + if save_dataset: + self.__save_dataset_lines(data=data, path=self.tmp_dir) scores = self._cross_val(data, save_errors_images) logging.info(json.dumps(scores, indent=4)) if not cross_val_only: @@ -97,7 +126,7 @@ def fit(self, no_cache: bool = False, cross_val_only: bool = False, save: bool = predicted = cls.predict(features_test) accuracy = accuracy_score(labels_test, predicted, sample_weight=sample_weight[-n:]) - print("Final Accuracy = {}".format(accuracy)) # noqa + print(f"Final Accuracy = {accuracy}") scores["final_accuracy"] = accuracy if not os.path.isdir(os.path.dirname(self.path_out)): @@ -114,10 +143,24 @@ def fit(self, no_cache: bool = False, cross_val_only: bool = False, save: bool = os.makedirs(os.path.dirname(self.path_features_importances), exist_ok=True) self._save_features_importances(cls, features_train.columns) - def _save_features_importances(self, cls: Any, feature_names: List[str]) -> None: # noqa + def _save_features_importances(self, cls: BaseClassifier, feature_names: List[str]) -> None: + """ + Save information about most important features for classifier during training into a file with path `self.path_features_importances`. + + :param cls: classifier trained on the features with names `feature_names` + :param feature_names: column names of the feature matrix, that was used for classifier training + """ pass - def __save(self, data: List[List[LineWithLabel]], path: str = "/tmp", csv_only: bool = False) -> str: + def __save_dataset_lines(self, data: List[List[LineWithLabel]], path: str = "/tmp", csv_only: bool = False) -> str: + """ + Save training dataset in form of a feature matrix (:class:`~scripts.train.trainers.dataset.LineClassifierDataset`). + + :param data: list of documents, which are lists of lines with labels of the training dataset + :param path: path to the directory where the dataset will be saved + :param csv_only: whether to save only csv-file instead of saving dataset as a directory with csv, pkl and json files + :return: path to the directory where the dataset is saved + """ features_train = self.feature_extractor.fit_transform(data) features_list = sorted(features_train.columns) features_train = features_train[features_list] @@ -136,9 +179,21 @@ def __save(self, data: List[List[LineWithLabel]], path: str = "/tmp", csv_only: @abc.abstractmethod def _get_classifier(self) -> BaseClassifier: + """ + Initialize the classifier. + + :return: classifier instance for training + """ pass def _cross_val(self, data: List[List[LineWithLabel]], save_errors_images: bool) -> dict: + """ + Cross-validate the classifier and save its errors on validation data + + :param data: list of documents, which are lists of lines with labels of the training dataset + :param save_errors_images: whether to visualize errors line classification (:class:`~scripts.train.trainers.errors_saver.ErrorsSaver`) + :return: dictionary with classifier scores during cross-validation + """ error_cnt = Counter() errors_uids = [] os.system(f"rm -rf {self.path_errors}/*") @@ -147,6 +202,7 @@ def _cross_val(self, data: List[List[LineWithLabel]], save_errors_images: bool) data = np.array(data, dtype=object) kf = KFold(n_splits=self.n_splits) + for train_index, val_index in tqdm(kf.split(data), total=self.n_splits): data_train, data_val = data[train_index].tolist(), data[val_index].tolist() labels_train = self.__get_labels(data_train) @@ -156,12 +212,14 @@ def _cross_val(self, data: List[List[LineWithLabel]], save_errors_images: bool) if features_train.shape[1] != features_val.shape[1]: val_minus_train = set(features_val.columns) - set(features_train.columns) train_minus_val = set(features_val.columns) - set(features_train.columns) - msg = f"some features in train, but not in val {val_minus_train}\nsome features in val, but not in train {train_minus_val}" + msg = f"Some features in train, but not in val {val_minus_train}\nsome features in val, but not in train {train_minus_val}" raise ValueError(msg) cls = self._get_classifier() sample_weight = [self.get_sample_weight(line) for line in flatten(data_train)] cls.fit(features_train, labels_train, sample_weight=sample_weight) labels_predict = cls.predict(features_val) + + # save classifier's errors on val set for y_pred, y_true, line in zip(labels_predict, labels_val, flatten(data_val)): if y_true != y_pred: error_cnt[(y_true, y_pred)] += 1 @@ -178,10 +236,14 @@ def _cross_val(self, data: List[List[LineWithLabel]], save_errors_images: bool) scores_dict = OrderedDict() scores_dict["mean"] = mean(scores) scores_dict["scores"] = scores - csv_path = self.__save(data=data.tolist(), path=self.dataset_dir, csv_only=True) + csv_path = self.__save_dataset_lines(data=data.tolist(), path=self.dataset_dir, csv_only=True) self.errors_saver.save_errors(error_cnt=error_cnt, errors_uids=list(set(errors_uids)), save_errors_images=save_errors_images, csv_path=csv_path) return scores_dict def __get_labels(self, data: List[List[LineWithLabel]]) -> List[str]: + """ + :param data: list of documents, which are lists of lines with labels of the training dataset + :return: a flattened list of lines' labels + """ result = [line.label for line in flatten(data)] return result diff --git a/dedoc/train_dataset/trainer/data_loader.py b/scripts/train/trainers/data_loader.py similarity index 74% rename from dedoc/train_dataset/trainer/data_loader.py rename to scripts/train/trainers/data_loader.py index 6be8d780..d21a38a9 100644 --- a/dedoc/train_dataset/trainer/data_loader.py +++ b/scripts/train/trainers/data_loader.py @@ -12,38 +12,53 @@ import wget from torch.utils.data import Dataset -from dedoc.train_dataset.data_structures.line_with_label import LineWithLabel -from dedoc.train_dataset.extractors.line_with_meta_extractor import LineWithMetaExtractor from dedoc.utils.utils import flatten +from train_dataset.data_structures.line_with_label import LineWithLabel +from train_dataset.extractors.line_with_meta_extractor import LineWithMetaExtractor class DataLoader: + """ + Class for downloading data from the cloud, distributing lines into document groups and sorting them. + Returns data in form of document lines with their labels. + """ def __init__(self, dataset_dir: str, label_transformer: Callable[[str], str], logger: logging.Logger, data_url: str, *, config: dict) -> None: + """ + :param dataset_dir: path to the directory where to store downloaded dataset + :param label_transformer: function for mapping initial data labels into the labels for classifier training + :param logger: logger for logging details of dataset loading + :param data_url: url to download data from + :param config: any custom configuration + """ self.label_transformer = label_transformer self.dataset_dir = dataset_dir self.logger = logger self.data_url = data_url self.config = config - def __sort_data(self, documents: List[List[LineWithLabel]]) -> List[List[LineWithLabel]]: - for num, doc in enumerate(documents): - documents[num] = sorted(doc, key=lambda line: (line.metadata.page_id, line.metadata.line_id)) - return documents - def get_data(self, no_cache: bool = False) -> List[List[LineWithLabel]]: + """ + Download data from a cloud at `self.data_url` and sort document lines. + + :param no_cache: whether to use cached data (if dataset is already downloaded) or download it anyway + :return: list of documents, which are lists of lines with labels of the training dataset + """ pkl_path = os.path.join(self.dataset_dir, "dataset.pkl.gz") + if os.path.isfile(pkl_path) and not no_cache: with gzip.open(pkl_path) as input_file: result = pickle.load(input_file) - print("func get_data(): Data were loaded from the local disk") # noqa + self.logger.info("Data were loaded from the local disk") return self.__sort_data(result) + os.makedirs(self.dataset_dir, exist_ok=True) path_out = os.path.join(self.dataset_dir, "dataset.zip") self.logger.info("Start download dataset") wget.download(self.data_url, path_out) self.logger.info("Finish download dataset") + # make list of LineWithLabel instead of dictionary of serialized lines with TemporaryDirectory() as tmp_dir: with zipfile.ZipFile(path_out, "r") as zip_ref: zip_ref.extractall(tmp_dir) @@ -55,6 +70,7 @@ def get_data(self, no_cache: bool = False) -> List[List[LineWithLabel]]: data = metadata_extractor.create_task() grouped = defaultdict(list) + # postprocess lines' labels and group them according to their document uid_set = set() for line in data: if line.uid in uid_set: @@ -70,6 +86,11 @@ def get_data(self, no_cache: bool = False) -> List[List[LineWithLabel]]: pickle.dump(result, gzip.open(pkl_path, "wb")) return self.__sort_data(result) + def __sort_data(self, documents: List[List[LineWithLabel]]) -> List[List[LineWithLabel]]: + for num, doc in enumerate(documents): + documents[num] = sorted(doc, key=lambda line: (line.metadata.page_id, line.metadata.line_id)) + return documents + class LineEpsDataSet(Dataset): def __init__(self, features: pd.DataFrame, labels: List[str], class_dict: dict, eps: int = 3) -> None: diff --git a/dedoc/train_dataset/trainer/dataset.py b/scripts/train/trainers/dataset.py similarity index 56% rename from dedoc/train_dataset/trainer/dataset.py rename to scripts/train/trainers/dataset.py index 75233a52..b93212bb 100644 --- a/dedoc/train_dataset/trainer/dataset.py +++ b/scripts/train/trainers/dataset.py @@ -7,15 +7,18 @@ class LineClassifierDataset: + """ + Dataset of lines in form of a feature matrix (with already extracted features) + """ def __init__(self, dataframe: pd.DataFrame, feature_list: List[str], group_name: str, label_name: str, text_name: str) -> None: """ - @param dataframe: pandas dataframe with features and metadata - @param feature_list: list of feature columns name - @param group_name: name of group column (for example "document") - @param label_name: name of label column (for example "label") + :param dataframe: pandas dataframe with features and metadata, columns are features (and metadata), rows are documents' lines + :param feature_list: list of feature columns' names + :param group_name: name of the group column, e.g., "document" (dataset is split into groups, one group of lines is related to one document) + :param label_name: name of the label column, e.g., "label" + :param text_name: name of the column with lines' text, e.g., "text" """ - super().__init__() self.label_name = label_name self.group_name = group_name self.feature_list = feature_list @@ -24,20 +27,33 @@ def __init__(self, dataframe: pd.DataFrame, feature_list: List[str], group_name: @property def features(self) -> pd.DataFrame: + """ + pandas dataframe with features, columns are features, rows are documents' lines + """ return self.dataframe[self.feature_list] @property def labels(self) -> pd.Series: + """ + a vector of labels for each line + """ return self.dataframe[self.label_name] def save(self, path: str = "/tmp", csv_only: bool = False) -> str: + """ + :param path: path to the directory where the dataset will be saved + :param csv_only: whether to save only csv-file instead of saving dataset as a directory with csv, pkl and json files + :return: path to the directory where the dataset is saved + """ if csv_only: self.dataframe.to_csv(os.path.join(path, "dataset.csv")) return path + dir_out = os.path.join(path, f"dataset_{int(time.time() * 1000)}") os.mkdir(dir_out) self.dataframe.to_csv(os.path.join(dir_out, "dataset.csv")) self.dataframe.to_pickle(os.path.join(dir_out, "dataset.pkl.gz")) + with open(os.path.join(dir_out, "description.json"), "w") as out: d = dict(label_name=self.label_name, group_name=self.group_name, text_name=self.text_name, feature_list=self.feature_list) json.dump(obj=d, fp=out, ensure_ascii=False, indent=4) diff --git a/dedoc/train_dataset/trainer/errors_saver.py b/scripts/train/trainers/errors_saver.py similarity index 62% rename from dedoc/train_dataset/trainer/errors_saver.py rename to scripts/train/trainers/errors_saver.py index 1d591a96..a3c643cb 100644 --- a/dedoc/train_dataset/trainer/errors_saver.py +++ b/scripts/train/trainers/errors_saver.py @@ -9,33 +9,58 @@ import pandas as pd from tqdm import tqdm -from dedoc.train_dataset.data_structures.images_archive import ImagesArchive -from dedoc.train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator -from dedoc.train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator -from dedoc.train_dataset.taskers.images_creators.concrete_creators.txt_images_creator import TxtImagesCreator +from train_dataset.data_structures.images_archive import ImagesArchive +from train_dataset.taskers.images_creators.concrete_creators.abstract_images_creator import AbstractImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.scanned_images_creator import ScannedImagesCreator +from train_dataset.taskers.images_creators.concrete_creators.txt_images_creator import TxtImagesCreator class ErrorsSaver: + """ + Class for saving line classifier's errors during training. + Errors are saved in the directory with a path `errors_path` as a list of TXT files. + Each TXT file contains information about lines with similar errors (same substitution true_type -> predicted_type). + + In addition, images can be saved for misclassified lines visualization (misclassified lines are highlighted by a bounding box). + """ def __init__(self, errors_path: str, dataset_path: str, logger: logging.Logger, *, config: dict) -> None: + """ + :param errors_path: path to the directory where txt files with errors will be saved (and archive with images if needed) + :param dataset_path: path to the ZIP file with a training dataset downloaded by :class:`~scripts.train.trainers.data_loader.DataLoader` + :param logger: logger for logging details of errors saving + :param config: any custom configuration + """ self.logger = logger + self.config = config self.errors_path = errors_path - self.images_archive = os.path.join(dataset_path, "images.zip") + self.dataset_path = dataset_path + dataset_dir = os.path.dirname(dataset_path) + + # archive with images of all lines + self.images_archive = os.path.join(dataset_dir, "images.zip") + + # file with the list of processed document names + self.errors_documents_file = os.path.join(dataset_dir, "errors_documents.json") - # the name of document in the images archive with the list of processed document names - self.errors_documents = os.path.join(dataset_path, "errors_documents.json") + # archive with images of misclassified lines self.errors_images_archive = os.path.join(self.errors_path, "errors_images.zip") - self.dataset_path = os.path.join(dataset_path, "dataset.zip") - self.config = config def save_errors(self, error_cnt: Counter, errors_uids: List[str], csv_path: str, save_errors_images: bool = False) -> None: + """ + :param error_cnt: counter of label pairs (y_true, y_pred) where y_true != y_pred + :param errors_uids: list of lines' uids corresponding misclassified lines + :param csv_path: path to the csv-file with dataset in form of a feature matrix (:class:`~scripts.train.trainers.dataset.LineClassifierDataset`) + :param save_errors_images: whether to save images with highlighted misclassified lines, if True images will be saved in "errors_images.zip" + """ assert len(set(errors_uids)) == len(errors_uids) - self.logger.info(f"save errors in {self.errors_path}") + self.logger.info(f"Save errors in {self.errors_path}") errors_total_num = sum(error_cnt.values()) - print(f"{'true':16s} -> {'predicted':16s} {'cnt':6s} {'(percent)':16s}") # noqa + self.logger.info(f"{'true':16s} -> {'predicted':16s} {'cnt':6s} {'(percent)':16s}") for error, cnt in error_cnt.most_common(): y_true, y_pred = error - print(f"{y_true:16s} -> {y_pred:16s} {cnt:06,} ({100 * cnt / errors_total_num:02.2f}%)") # noqa + self.logger.info(f"{y_true:16s} -> {y_pred:16s} {cnt:06,} ({100 * cnt / errors_total_num:02.2f}%)") if save_errors_images: self.__save_images(errors_uids, csv_path) @@ -64,21 +89,24 @@ def save_errors(self, error_cnt: Counter, errors_uids: List[str], csv_path: str, os.remove(path_file) def __save_images(self, errors_uids: List[str], csv_path: str) -> None: - # 1) find image in the images archive, save to errors_images_archive if was found, else do the following: - # 2) find line uid and document name in the dataframe - # 3) find document in the dataset archive - # 4) add images with bboxes to images_archive - # 5) add images with bboxes from errors_uids to errors_images_archive + """ + 1 - find image in the images archive, save to errors_images_archive if was found, else do the following: + 2 - find line uid and document name in the dataframe + 3 - find document in the dataset archive + 4 - add images with bboxes to images_archive + 5 - add images with bboxes from errors_uids to errors_images_archive + """ csv_dataset_path = os.path.join(csv_path, "dataset.csv") if not os.path.isfile(self.dataset_path) or not os.path.isfile(csv_dataset_path): return + with tempfile.TemporaryDirectory() as documents_tmp_dir: with zipfile.ZipFile(self.dataset_path, "r") as dataset_archive: dataset_archive.extractall(documents_tmp_dir) path2docs = os.path.join(documents_tmp_dir, "original_documents") images_creators = [ ScannedImagesCreator(path2docs=path2docs), - # DocxImagesCreator(path2docs=path2docs, config=_config), + DocxImagesCreator(path2docs=path2docs, config=self.config), TxtImagesCreator(path2docs=path2docs, config=self.config) ] self.__group_data(os.path.join(documents_tmp_dir, "labeled.json")) @@ -100,6 +128,9 @@ def __process_uid(self, ready_documents: List[str], ready_images: List[str], uid: str) -> None: + """ + Add the image to the `errors_images_archive` with the given `uid` + """ done_set = set() document_name = filtered_dataset[filtered_dataset.uid == uid].head(1).group.item() img_name = f"{uid}.jpg" @@ -123,13 +154,17 @@ def __process_uid(self, with errors_images_archive.open(img_name, "w") as write_image: write_image.write(error_image) # save new list of ready documents - with open(self.errors_documents, "w") as json_file: + with open(self.errors_documents_file, "w") as json_file: json.dump(ready_documents, json_file) def __prepare_files(self) -> Tuple[List[str], List[str]]: - if not os.path.isfile(self.errors_documents): - with open(self.errors_documents, "w") as json_file: + """ + Initialize the list of documents whose pages was processed, and the list of page images with line bboxes + """ + if not os.path.isfile(self.errors_documents_file): + with open(self.errors_documents_file, "w") as json_file: json.dump([], json_file) + if not os.path.isfile(self.images_archive): with zipfile.ZipFile(self.images_archive, "w"): ready_images = [] @@ -137,11 +172,15 @@ def __prepare_files(self) -> Tuple[List[str], List[str]]: else: with zipfile.ZipFile(self.images_archive, "r") as images_archive: ready_images = images_archive.namelist() - with open(self.errors_documents, "r") as json_file: - ready_documents = json.load(json_file) + with open(self.errors_documents_file, "r") as json_file: + ready_documents = json.load(json_file) + return ready_documents, ready_images def __group_data(self, data_path: str) -> None: + """ + Group lines dicts into document groups + """ with open(data_path, "r") as f: data = json.load(f) result_dict = defaultdict(list) diff --git a/dedoc/train_dataset/trainer/line_lstm_classifier_trainer.py b/scripts/train/trainers/line_lstm_classifier_trainer.py similarity index 96% rename from dedoc/train_dataset/trainer/line_lstm_classifier_trainer.py rename to scripts/train/trainers/line_lstm_classifier_trainer.py index e34b8194..e3b49829 100644 --- a/dedoc/train_dataset/trainer/line_lstm_classifier_trainer.py +++ b/scripts/train/trainers/line_lstm_classifier_trainer.py @@ -19,9 +19,9 @@ from tqdm import tqdm from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor -from dedoc.train_dataset.data_structures.line_with_label import LineWithLabel -from dedoc.train_dataset.trainer.data_loader import DataLoader, LineEpsDataSet from dedoc.utils.utils import flatten +from scripts.train.trainers.data_loader import DataLoader, LineEpsDataSet +from train_dataset.data_structures.line_with_label import LineWithLabel class Attention(nn.Module): @@ -151,10 +151,10 @@ def __init__(self, self.class_dict = class_dict self.num_classes = len(class_dict) if torch.cuda.is_available() and on_gpu: - print("Device is cuda") # noqa + print("Device is cuda") self.device = torch.device("cuda:0") else: - print("Device is cpu") # noqa + print("Device is cpu") self.device = torch.device("cpu") def __get_labels(self, data: List[List[LineWithLabel]]) -> List[str]: @@ -189,14 +189,14 @@ def training_and_evaluation_process(self, lstm_model: nn.Module, optimizer: Opti time_epoch = 0.0 for epoch in range(self.num_epochs): - print("\n\t Epoch: {}".format(epoch)) # noqa + print(f"\n\t Epoch: {epoch}") # The Dataloader class handles all the shuffles for you loader_iter = iter(LineEpsDataSet(features_train, labels_train, self.class_dict)) time_begin = time.time() train_loss, train_acc = self.train(lstm_model, loader_iter, len(labels_train), optimizer, criteria, batch_size=self.batch_size) time_epoch += time.time() - time_begin - print(f"\n\t \x1b\33[33mTrain: epoch: {epoch}| Train loss: {train_loss} | Train acc: {train_acc}\x1b[0m") # noqa + print(f"\n\t \x1b\33[33mTrain: epoch: {epoch}| Train loss: {train_loss} | Train acc: {train_acc}\x1b[0m") if file_log: file_log.write(f"\t Train: epoch: {epoch}| Train loss: {epoch} | Train acc: {train_loss}\n") @@ -204,7 +204,7 @@ def training_and_evaluation_process(self, lstm_model: nn.Module, optimizer: Opti if with_eval: loader_iter = iter(LineEpsDataSet(features_test, labels_test, self.class_dict)) test_loss, test_acc = self.evaluate(lstm_model, loader_iter, len(labels_test), criteria, batch_size=self.batch_size) - print(f"\n\t \x1b\33[92mEvaluation: Test loss: {test_loss} | Test acc: {test_acc}\x1b[0m") # noqa + print(f"\n\t \x1b\33[92mEvaluation: Test loss: {test_loss} | Test acc: {test_acc}\x1b[0m") if file_log: file_log.write(f"\t Eval: epoch: {epoch}| Test loss: {test_loss} | Test acc: {test_acc}\n") curr_loss = test_loss @@ -219,7 +219,7 @@ def training_and_evaluation_process(self, lstm_model: nn.Module, optimizer: Opti if with_save and curr_loss < best_loss: best_loss = curr_loss torch.save(lstm_model.state_dict(), self.path_out) - print(f"Model has been saved into {self.path_out}") # noqa + print(f"Model has been saved into {self.path_out}") return res_loss / self.num_epochs, res_acc / self.num_epochs, time_epoch / self.num_epochs @@ -237,7 +237,7 @@ def fit(self, with_cross_val: bool = True) -> None: data = np.array(data, dtype=object) if with_cross_val: - print("\n\x1b\33[95m---------Evaluation process (cross-validation) starts-------\x1b[0m\n") # noqa + print("\n\x1b\33[95m---------Evaluation process (cross-validation) starts-------\x1b[0m\n") kf = KFold(n_splits=self.n_splits) scores = [] epoch_time = [] @@ -267,7 +267,7 @@ def fit(self, with_cross_val: bool = True) -> None: scores_dict["scores"] = scores logfile_kfold_tmp.close() - print("\n\x1b\33[95m-------------------Train process starts------------------\x1b[0m\n") # noqa + print("\n\x1b\33[95m-------------------Train process starts------------------\x1b[0m\n") features_train, labels_train = self.get_features(data) lstm_model = LSTM(input_dim=features_train.shape[1], hidden_dim=features_train.shape[1], hidden_dim_2=lstm_hidden_dim, num_classes=self.num_classes, lstm_layers=lstm_layers, @@ -278,7 +278,7 @@ def fit(self, with_cross_val: bool = True) -> None: labels_test=None, file_log=None, with_save=True, with_eval=False) - print("\x1b\33[92mFinal Accuracy from training = {}\x1b[0m".format(acc)) # noqa + print(f"\x1b\33[92mFinal Accuracy from training = {acc}\x1b[0m") scores_dict["final_accuracy"] = acc if self.path_scores is not None: @@ -338,7 +338,7 @@ def train(self, model: nn.Module, iterator: Iterator, cnt_data: int, optimizer: epoch_acc += accuracy cnt += 1 if log_per_cnt != 0 and batch_num % log_per_cnt == 0: - print(f"\t\tbatch_num: {batch_num}, loss={epoch_loss / cnt}, acc={epoch_acc / cnt}") # noqa + print(f"\t\tbatch_num: {batch_num}, loss={epoch_loss / cnt}, acc={epoch_acc / cnt}") return epoch_loss / cnt, epoch_acc / cnt diff --git a/dedoc/train_dataset/trainer/logreg_line_classifier_trainer.py b/scripts/train/trainers/logreg_line_classifier_trainer.py similarity index 88% rename from dedoc/train_dataset/trainer/logreg_line_classifier_trainer.py rename to scripts/train/trainers/logreg_line_classifier_trainer.py index a98bc51d..375b0060 100644 --- a/dedoc/train_dataset/trainer/logreg_line_classifier_trainer.py +++ b/scripts/train/trainers/logreg_line_classifier_trainer.py @@ -4,8 +4,8 @@ from sklearn.linear_model import LogisticRegression from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor -from dedoc.train_dataset.data_structures.line_with_label import LineWithLabel -from dedoc.train_dataset.trainer.base_sklearn_line_classifier import BaseSklearnLineClassifierTrainer +from scripts.train.trainers.base_sklearn_line_classifier import BaseSklearnLineClassifierTrainer +from train_dataset.data_structures.line_with_label import LineWithLabel class LogRegLineClassifierTrainer(BaseSklearnLineClassifierTrainer): diff --git a/scripts/train/trainers/xgboost_line_classifier_trainer.py b/scripts/train/trainers/xgboost_line_classifier_trainer.py new file mode 100644 index 00000000..7f71a122 --- /dev/null +++ b/scripts/train/trainers/xgboost_line_classifier_trainer.py @@ -0,0 +1,30 @@ +from typing import List + +import xgbfir +from xgboost import XGBClassifier + +from scripts.train.trainers.base_sklearn_line_classifier import BaseSklearnLineClassifierTrainer + + +class XGBoostLineClassifierTrainer(BaseSklearnLineClassifierTrainer): + """ + Trainer of XGBoost line classifier. + See documentation of `XGBClassifier `_ to get more details. + """ + + def _get_classifier(self) -> XGBClassifier: + """ + Initialize the XGBClassifier. + + :return: XGBClassifier instance for training + """ + return XGBClassifier(random_state=self.random_seed, **self.classifier_parameters) + + def _save_features_importances(self, cls: XGBClassifier, feature_names: List[str]) -> None: + """ + Save information about most important features for XGBClassifier using `xgbfir `_ library. + + :param cls: XGBClassifier trained on the features with names `feature_names` + :param feature_names: column names of the feature matrix, that was used for classifier training + """ + xgbfir.saveXgbFI(cls, feature_names=feature_names, OutputXlsxFile=self.path_features_importances) diff --git a/tests/api_tests/test_api_doctype_law.py b/tests/api_tests/test_api_doctype_law.py index 52f6d7dd..eff24264 100644 --- a/tests/api_tests/test_api_doctype_law.py +++ b/tests/api_tests/test_api_doctype_law.py @@ -108,6 +108,7 @@ def test_law_odt(self) -> None: file_name = "ukrf.odt" self._check_ukrf(file_name) + @unittest.skip("TODO fix incorrect line classification because of bold text inside it (bold text was found after Статья 20.1.)") def test_law_article_multiline(self) -> None: file_name = "article_multiline.png" result = self._send_request(file_name, dict(document_type="law"), expected_code=200) diff --git a/tests/api_tests/test_api_format_html.py b/tests/api_tests/test_api_format_html.py index 8a28c357..07561afd 100644 --- a/tests/api_tests/test_api_format_html.py +++ b/tests/api_tests/test_api_format_html.py @@ -147,6 +147,15 @@ def test_html_with_styles_as_attribute(self) -> None: self.assertIn({"name": "bold", "value": "True", "start": 33, "end": 47}, annotations) self.assertIn({"name": "bold", "value": "True", "start": 0, "end": 15}, annotations) + def test_html_table_with_styles(self) -> None: + file_name = "table_with_styles.html" + result = self._send_request(file_name) + table = result["content"]["tables"][0] + self.assertIn({"start": 0, "end": 6, "name": "bold", "value": "True"}, table["cells"][1][0]["lines"][0]["annotations"]) + self.assertIn({"start": 0, "end": 10, "name": "italic", "value": "True"}, table["cells"][1][1]["lines"][0]["annotations"]) + self.assertIn({"start": 0, "end": 10, "name": "linked_text", "value": "some_text"}, table["cells"][2][0]["lines"][0]["annotations"]) + self.assertIn({"start": 0, "end": 16, "name": "strike", "value": "True"}, table["cells"][2][1]["lines"][0]["annotations"]) + def test_html_font_style_attribute(self) -> None: file_name = "210.html" self._send_request(file_name) diff --git a/tests/api_tests/test_api_format_pdf.py b/tests/api_tests/test_api_format_pdf.py index afd29131..a78026bd 100644 --- a/tests/api_tests/test_api_format_pdf.py +++ b/tests/api_tests/test_api_format_pdf.py @@ -170,3 +170,24 @@ def test_document_orientation(self) -> None: "0729.12.2014 № 168\n" '"БУРЫЙ МЕДВЕДЬ\n' "{вид охотничьих ресурсов)\n") + + def test_bold_annotation(self) -> None: + file_name = "bold_font.png" + result = self._send_request(file_name) + tree = result["content"]["structure"] + + node = tree["subparagraphs"][0] + bold_annotations = [annotation for annotation in node["annotations"] if annotation["name"] == "bold" and annotation["value"] == "True"] + self.assertEqual(len(bold_annotations), 2) + bold_annotations = sorted(bold_annotations, key=lambda x: x["start"]) + self.assertEqual((bold_annotations[0]["start"], bold_annotations[0]["end"]), (8, 12)) + self.assertEqual((bold_annotations[1]["start"], bold_annotations[1]["end"]), (29, 33)) + + node = tree["subparagraphs"][1] + bold_annotations = [annotation for annotation in node["annotations"] if annotation["name"] == "bold" and annotation["value"] == "True"] + self.assertEqual(len(bold_annotations), 0) + + node = tree["subparagraphs"][2] + bold_annotations = [annotation for annotation in node["annotations"] if annotation["name"] == "bold" and annotation["value"] == "True"] + self.assertEqual(len(bold_annotations), 1) + self.assertEqual((bold_annotations[0]["start"], bold_annotations[0]["end"]), (0, len(node["text"].strip()))) diff --git a/tests/data/htmls/table_with_styles.html b/tests/data/htmls/table_with_styles.html new file mode 100644 index 00000000..c3e3d028 --- /dev/null +++ b/tests/data/htmls/table_with_styles.html @@ -0,0 +1,18 @@ + + + + + Название тестового документа + + + + + + + + + + +
Пример таблицы со стилями
Первый столбецВторой столбец

Что-то

Что-то ещё
Ещё что-тоПоследняя ячейка
+ + \ No newline at end of file diff --git a/tests/data/scanned/bold_font.png b/tests/data/scanned/bold_font.png new file mode 100644 index 00000000..b51b9916 Binary files /dev/null and b/tests/data/scanned/bold_font.png differ diff --git a/tests/unit_tests/test_doctype_law_structure_extractor.py b/tests/unit_tests/test_doctype_law_structure_extractor.py index f68068e6..b701627f 100644 --- a/tests/unit_tests/test_doctype_law_structure_extractor.py +++ b/tests/unit_tests/test_doctype_law_structure_extractor.py @@ -8,8 +8,8 @@ from dedoc.data_structures.line_metadata import LineMetadata from dedoc.data_structures.line_with_meta import LineWithMeta from dedoc.structure_extractors.concrete_structure_extractors.law_structure_excractor import LawStructureExtractor -from dedoc.structure_extractors.feature_extractors.abstract_extractor import AbstractFeatureExtractor from dedoc.structure_extractors.hierarchy_level_builders.law_builders.body_builder.body_law_hierarchy_level_builder import BodyLawHierarchyLevelBuilder +from dedoc.structure_extractors.hierarchy_level_builders.utils_reg import regexps_ends_of_number from tests.test_utils import TestTimeout, get_test_config @@ -73,7 +73,7 @@ def test_number_ends(self) -> None: without_ends = ["1.2.2)", "1.4.5", "1.5.6.", "1.2.4.6}"] for num, number in enumerate(numbers): - res = AbstractFeatureExtractor.ends_of_number.search(number) + res = regexps_ends_of_number.search(number) self.assertEqual(number[:res.start()], without_ends[num]) def __get_line_with_meta(self, hierarchy_level: HierarchyLevel, text: str) -> LineWithMeta: diff --git a/tests/unit_tests/test_format_image_reader_bbox.py b/tests/unit_tests/test_format_image_reader_bbox.py index 497d11fc..4523a702 100644 --- a/tests/unit_tests/test_format_image_reader_bbox.py +++ b/tests/unit_tests/test_format_image_reader_bbox.py @@ -17,7 +17,8 @@ def test_line_order(self) -> None: page = self.reader.split_image2lines(image=image, page_num=1, is_one_column_document=True) bboxes = [bbox for bbox in page.bboxes if bbox.text.strip() != ""] for bbox in bboxes: - bbox.text = re.sub(r"\s+", " ", bbox.text) + for word in bbox.words: + word.text = re.sub(r"\s+", "", word.text) self.assertEqual("Утвержден", bboxes[0].text.strip()) self.assertEqual("приказом ФСТЭК России", bboxes[1].text.strip()) self.assertEqual("Утвержден", bboxes[0].text.strip()) diff --git a/tests/unit_tests/test_misc_hierarchy_level.py b/tests/unit_tests/test_misc_hierarchy_level.py index ab46a11b..f1625bba 100644 --- a/tests/unit_tests/test_misc_hierarchy_level.py +++ b/tests/unit_tests/test_misc_hierarchy_level.py @@ -12,9 +12,8 @@ def test_three_lines_equal_levels(self) -> None: self.assertTrue(h1 == h2) self.assertTrue(h1 >= h2) self.assertTrue(h1 <= h2) - self.assertTrue(h1 == h3) + self.assertFalse(h1 == h3) self.assertTrue(h1 >= h3) - self.assertTrue(h1 <= h3) def test_raw_text_greater_than_any_other(self) -> None: list_item = HierarchyLevel(level_1=2, level_2=1, can_be_multiline=False, line_type=HierarchyLevel.list_item) diff --git a/tests/unit_tests/test_misc_tasker.py b/tests/unit_tests/test_misc_tasker.py deleted file mode 100644 index 8fd3c66c..00000000 --- a/tests/unit_tests/test_misc_tasker.py +++ /dev/null @@ -1,166 +0,0 @@ -import json -import os -import tempfile -import unittest -import zipfile -from zipfile import ZipFile - -from PIL import Image - -from dedoc.attachments_handler.attachments_handler import AttachmentsHandler -from dedoc.converters.converter_composition import ConverterComposition -from dedoc.dedoc_manager import DedocManager -from dedoc.metadata_extractors.concrete_metadata_extractors.base_metadata_extractor import BaseMetadataExtractor -from dedoc.metadata_extractors.metadata_extractor_composition import MetadataExtractorComposition -from dedoc.readers.docx_reader.docx_reader import DocxReader -from dedoc.readers.reader_composition import ReaderComposition -from dedoc.readers.txt_reader.raw_text_reader import RawTextReader -from dedoc.structure_constructors.concrete_structure_constructors.tree_constructor import TreeConstructor -from dedoc.structure_constructors.structure_constructor_composition import StructureConstructorComposition -from dedoc.structure_extractors.concrete_structure_extractors.classifying_law_structure_extractor import ClassifyingLawStructureExtractor -from dedoc.structure_extractors.concrete_structure_extractors.default_structure_extractor import DefaultStructureExtractor -from dedoc.structure_extractors.concrete_structure_extractors.foiv_law_structure_extractor import FoivLawStructureExtractor -from dedoc.structure_extractors.concrete_structure_extractors.law_structure_excractor import LawStructureExtractor -from dedoc.structure_extractors.structure_extractor_composition import StructureExtractorComposition -from dedoc.train_dataset.taskers.concrete_taskers.line_label_tasker import LineLabelTasker -from dedoc.train_dataset.taskers.images_creators.concrete_creators.docx_images_creator import DocxImagesCreator -from dedoc.train_dataset.taskers.images_creators.concrete_creators.txt_images_creator import TxtImagesCreator -from dedoc.train_dataset.taskers.tasker import Tasker -from dedoc.train_dataset.train_dataset_utils import get_path_original_documents -from tests.test_utils import get_test_config - - -class TestTasker(unittest.TestCase): - base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "taskers")) - path2bboxes = os.path.join(base_path, "bboxes.jsonlines") - path2lines = os.path.join(base_path, "lines.jsonlines") - path2docs = os.path.join(base_path, "images") - manifest_path = os.path.join(base_path, "test_manifest.md") - config_path = os.path.join(base_path, "test_config.json") - - def test_paths(self) -> None: - self.assertTrue(os.path.isfile(self.path2bboxes), self.path2bboxes) - self.assertTrue(os.path.isfile(self.path2lines), self.path2lines) - self.assertTrue(os.path.isfile(self.manifest_path), self.manifest_path) - self.assertTrue(os.path.isfile(self.config_path), self.config_path) - self.assertTrue(os.path.isdir(self.path2docs), self.path2docs) - - def test_line_label_tasker_size1(self) -> None: - tasker = self._get_line_label_classifier() - task_cnt = 0 - for task_path in tasker.create_tasks(task_size=10): - task_cnt += 1 - self._test_task_archive(task_path) - - def test_line_label_tasker_size2(self) -> None: - tasker = self._get_line_label_classifier() - - task_cnt = 0 - for task_path in tasker.create_tasks(task_size=2): - task_cnt += 1 - self._test_task_archive(task_path) - - def test_tasker(self) -> None: - with tempfile.TemporaryDirectory() as tmpdir: - taskers = { - "law_classifier": LineLabelTasker( - path2bboxes=self.path2bboxes, - path2lines=self.path2lines, - path2docs=self.path2docs, - manifest_path=self.manifest_path, - config_path=self.config_path, - tmp_dir=tmpdir, - config=get_test_config() - ) - } - tasker = Tasker(boxes_label_path=self.path2bboxes, - line_info_path=self.path2lines, - images_path=self.path2docs, - save_path=tmpdir, - concrete_taskers=taskers, - config=get_test_config()) - tasks_path, task_size = tasker.create_tasks(type_of_task="law_classifier", task_size=1) - self.assertTrue(os.path.isfile(tasks_path)) - self.assertEqual(1, task_size) - with ZipFile(tasks_path) as archive: - self.assertIn("original_documents.zip", archive.namelist()) - - def _get_line_label_classifier(self) -> LineLabelTasker: - config = get_test_config() - tasker = LineLabelTasker( - path2bboxes=self.path2bboxes, - path2lines=self.path2lines, - path2docs=self.path2docs, - manifest_path=self.manifest_path, - config_path=self.config_path, - tmp_dir="/tmp/tasker_test", - config=config - ) - return tasker - - def _test_task_archive(self, task_path: str) -> None: - self.assertTrue(os.path.isfile(task_path)) - with ZipFile(task_path) as archive: - namelist = [name.split("/", maxsplit=1)[-1] for name in archive.namelist()] - self.assertIn("test_config.json", namelist) - self.assertIn("test_manifest.md", namelist) - images_paths = [image for image in archive.namelist() if "_img_bbox_" in image] - self.assertTrue(len(images_paths) > 0) - for image_path in images_paths: - with archive.open(image_path) as image_file: - image = Image.open(image_file) - self.assertEqual((1276, 1754), image.size) - - def test_images_creators(self) -> None: - test_dict = {"english_doc.docx": 3, "txt_example.txt": 7} - config = get_test_config() - config["labeling_mode"] = True - path2docs = get_path_original_documents(config) - files_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "images_creator")) - images_creators = [DocxImagesCreator(path2docs, config=get_test_config()), TxtImagesCreator(path2docs, config=get_test_config())] - - test_manager = DedocManager(manager_config=self.__create_test_manager_config(config), config=config) - for doc in os.listdir(files_dir): - if not doc.endswith(("docx", "txt")): - continue - - with tempfile.TemporaryDirectory() as tmp_dir: - with zipfile.ZipFile(os.path.join(tmp_dir, "archive.zip"), "w") as archive: - _ = test_manager.parse(file_path=os.path.join(files_dir, doc), parameters=dict(document_type="law")) - lines_path = os.path.join(config["intermediate_data_path"], "lines.jsonlines") - self.assertTrue(os.path.isfile(lines_path)) - with open(lines_path, "r") as f: - lines = [json.loads(line) for line in f] - original_doc = lines[0]["original_document"] - path = os.path.join(get_path_original_documents(config), original_doc) - self.assertTrue(os.path.isfile(path)) - for images_creator in images_creators: - if images_creator.can_read(lines): - images_creator.add_images(page=lines, archive=archive) - break - self.assertEqual(len(archive.namelist()), test_dict[doc]) - os.remove(path) - os.remove(lines_path) - - config.pop("labeling_mode") - - def __create_test_manager_config(self, config: dict) -> dict: - readers = [DocxReader(config=config), RawTextReader(config=config)] - metadata_extractors = [BaseMetadataExtractor()] - law_extractors = { - FoivLawStructureExtractor.document_type: FoivLawStructureExtractor(config=config), - LawStructureExtractor.document_type: LawStructureExtractor(config=config) - } - structure_extractors = { - DefaultStructureExtractor.document_type: DefaultStructureExtractor(), - ClassifyingLawStructureExtractor.document_type: ClassifyingLawStructureExtractor(extractors=law_extractors, config=config) - } - - return dict( - converter=ConverterComposition(converters=[]), - reader=ReaderComposition(readers=readers), - structure_extractor=StructureExtractorComposition(extractors=structure_extractors, default_key="other"), - structure_constructor=StructureConstructorComposition(default_constructor=TreeConstructor(), constructors={"tree": TreeConstructor()}), - document_metadata_extractor=MetadataExtractorComposition(extractors=metadata_extractors), - attachments_handler=AttachmentsHandler(config=config) - ) diff --git a/tests/unit_tests/test_module_font_classifier.py b/tests/unit_tests/test_module_font_classifier.py index 29194dc3..513220bf 100644 --- a/tests/unit_tests/test_module_font_classifier.py +++ b/tests/unit_tests/test_module_font_classifier.py @@ -7,6 +7,7 @@ from dedoc.data_structures import BoldAnnotation from dedoc.readers.pdf_reader.data_classes.page_with_bboxes import PageWithBBox from dedoc.readers.pdf_reader.data_classes.text_with_bbox import TextWithBBox +from dedoc.readers.pdf_reader.data_classes.word_with_bbox import WordWithBBox from dedoc.readers.pdf_reader.pdf_image_reader.line_metadata_extractor.font_type_classifier import FontTypeClassifier @@ -18,19 +19,76 @@ def get_page(self) -> PageWithBBox: image = cv2.imread(os.path.join(self.data_directory_path, "example.png")) bboxes = [ - TextWithBBox(bbox=BBox(79, 86, 214, 21), page_num=0, text="Пример документа", line_num=0), - TextWithBBox(bbox=BBox(79, 113, 627, 20), page_num=0, text="Глава 1 с таким длинным названием которое даже не влазит в", line_num=0), - TextWithBBox(bbox=BBox(80, 142, 132, 16), page_num=0, text="одну строчку.", line_num=0), - TextWithBBox(bbox=BBox(80, 163, 154, 15), page_num=0, text="Какие то определения", line_num=0), - TextWithBBox(bbox=BBox(79, 182, 65, 11), page_num=0, text="Статья 1", line_num=0), - TextWithBBox(bbox=BBox(79, 201, 166, 15), page_num=0, text="опрделения", line_num=0), - TextWithBBox(bbox=BBox(79, 220, 66, 11), page_num=0, text="Статья 2", line_num=0), - TextWithBBox(bbox=BBox(79, 239, 124, 14), page_num=0, text="Дадим пояснения", line_num=0), - TextWithBBox(bbox=BBox(81, 259, 203, 11), page_num=0, text="1.2.1 Поясним за непонятное", line_num=0), - TextWithBBox(bbox=BBox(81, 278, 191, 11), page_num=0, text="1.2.2. Поясним за понятное", line_num=0), - TextWithBBox(bbox=BBox(129, 297, 171, 15), page_num=0, text="а) это даже ежу понятно", line_num=0), - TextWithBBox(bbox=BBox(129, 315, 153, 16), page_num=0, text="6) это ежу не понятно", line_num=0), - TextWithBBox(bbox=BBox(81, 335, 30, 11), page_num=0, text="123", line_num=0), + TextWithBBox( + bbox=BBox(79, 86, 214, 21), page_num=0, line_num=0, + words=[WordWithBBox(text="Пример", bbox=BBox(79, 86, 89, 21)), WordWithBBox(text="документа", bbox=BBox(175, 90, 118, 17))] + ), + TextWithBBox( + bbox=BBox(79, 113, 627, 20), page_num=0, line_num=0, + words=[ + WordWithBBox(text="Глава", bbox=BBox(79, 113, 58, 15)), WordWithBBox(text="1", bbox=BBox(144, 113, 9, 15)), + WordWithBBox(text="c", bbox=BBox(160, 117, 8, 11)), WordWithBBox(text="таким", bbox=BBox(175, 117, 62, 11)), + WordWithBBox(text="длинным", bbox=BBox(243, 118, 94, 14)), WordWithBBox(text="названием", bbox=BBox(344, 117, 105, 11)), + WordWithBBox(text="которое", bbox=BBox(456, 117, 77, 16)), WordWithBBox(text="даже", bbox=BBox(539, 117, 48, 15)), + WordWithBBox(text="не", bbox=BBox(594, 117, 21, 11)), WordWithBBox(text="влазит", bbox=BBox(622, 117, 67, 11)), + WordWithBBox(text="в", bbox=BBox(695, 118, 11, 10)) + ] + ), + TextWithBBox( + bbox=BBox(80, 142, 132, 16), page_num=0, line_num=0, + words=[WordWithBBox(text="одну", bbox=BBox(80, 142, 44, 16)), WordWithBBox(text="строчку.", bbox=BBox(131, 142, 81, 16))] + ), + TextWithBBox( + bbox=BBox(80, 163, 154, 15), page_num=0, line_num=0, + words=[ + WordWithBBox(text="Какие", bbox=BBox(80, 163, 41, 11)), WordWithBBox(text="то", bbox=BBox(126, 166, 14, 8)), + WordWithBBox(text="определения", bbox=BBox(146, 166, 88, 12)) + ] + ), + TextWithBBox( + bbox=BBox(79, 182, 65, 11), page_num=0, line_num=0, + words=[WordWithBBox(text="Статья", bbox=BBox(79, 182, 53, 11)), WordWithBBox(text="1", bbox=BBox(138, 182, 6, 11))] + ), + TextWithBBox(bbox=BBox(79, 201, 166, 15), page_num=0, line_num=0, words=[WordWithBBox(text="опрделения", bbox=BBox(164, 204, 81, 12))]), + TextWithBBox( + bbox=BBox(79, 220, 66, 11), page_num=0, line_num=0, + words=[WordWithBBox(text="Статья", bbox=BBox(79, 220, 53, 11)), WordWithBBox(text="2", bbox=BBox(138, 220, 7, 11))] + ), + TextWithBBox( + bbox=BBox(79, 239, 124, 14), page_num=0, line_num=0, + words=[WordWithBBox(text="Дадим", bbox=BBox(79, 239, 46, 14)), WordWithBBox(text="пояснения", bbox=BBox(130, 242, 73, 8))] + ), + TextWithBBox( + bbox=BBox(81, 259, 203, 11), page_num=0, line_num=0, + words=[ + WordWithBBox(text="1.2.1", bbox=BBox(81, 259, 30, 11)), WordWithBBox(text="Поясним", bbox=BBox(117, 259, 62, 11)), + WordWithBBox(text="за", bbox=BBox(185, 262, 12, 8)), WordWithBBox(text="непонятное", bbox=BBox(203, 262, 81, 8)) + ] + ), + TextWithBBox( + bbox=BBox(81, 278, 191, 11), page_num=0, line_num=0, + words=[ + WordWithBBox(text="1.2.2", bbox=BBox(81, 278, 34, 11)), WordWithBBox(text="Поясним", bbox=BBox(121, 278, 62, 11)), + WordWithBBox(text="за", bbox=BBox(189, 281, 13, 8)), WordWithBBox(text="понятное", bbox=BBox(207, 281, 65, 8)) + ] + ), + TextWithBBox( + bbox=BBox(129, 297, 171, 15), page_num=0, line_num=0, + words=[ + WordWithBBox(text="а)", bbox=BBox(129, 297, 11, 14)), WordWithBBox(text="это", bbox=BBox(146, 300, 21, 8)), + WordWithBBox(text="даже", bbox=BBox(172, 300, 34, 11)), WordWithBBox(text="ежу", bbox=BBox(211, 300, 26, 12)), + WordWithBBox(text="понятно", bbox=BBox(243, 300, 57, 8)) + ] + ), + TextWithBBox( + bbox=BBox(129, 315, 153, 16), page_num=0, line_num=0, + words=[ + WordWithBBox(text="б)", bbox=BBox(129, 315, 12, 15)), WordWithBBox(text="это", bbox=BBox(147, 319, 21, 8)), + WordWithBBox(text="ежу", bbox=BBox(174, 319, 26, 12)), WordWithBBox(text="не", bbox=BBox(205, 319, 15, 8)), + WordWithBBox(text="понятно", bbox=BBox(225, 319, 57, 8)) + ] + ), + TextWithBBox(bbox=BBox(81, 335, 30, 11), page_num=0, line_num=0, words=[WordWithBBox(text="1.23", bbox=BBox(81, 335, 30, 11))]), ] return PageWithBBox(image=image, bboxes=bboxes, page_num=0)