From 8d4a9fce378b617eac3f390ed7c3c396989bda5a Mon Sep 17 00:00:00 2001 From: FelixKirschKern Date: Mon, 2 Oct 2023 14:14:28 +0200 Subject: [PATCH 1/3] adds queries for heuristice by lt id, and any rla by lt id --- business_objects/information_source.py | 15 +++++++++++++++ business_objects/record_label_association.py | 19 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/business_objects/information_source.py b/business_objects/information_source.py index 1c165758..19194195 100644 --- a/business_objects/information_source.py +++ b/business_objects/information_source.py @@ -1,4 +1,5 @@ from datetime import datetime +from sqlalchemy import cast, TEXT from typing import Dict, List, Any, Optional from submodules.model import enums @@ -45,6 +46,20 @@ def get_all(project_id: str) -> List[InformationSource]: ) +def get_all_ids_by_labeling_task_id( + project_id: str, labeling_task_id: str +) -> List[str]: + values = ( + session.query(cast(InformationSource.id, TEXT)) + .filter( + InformationSource.project_id == project_id, + InformationSource.labeling_task_id == labeling_task_id, + ) + .all() + ) + return [value[0] for value in values] + + def get_all_statistics(project_id: str) -> List[InformationSourceStatistics]: return ( session.query(InformationSourceStatistics) diff --git a/business_objects/record_label_association.py b/business_objects/record_label_association.py index 932db31b..be0a452a 100644 --- a/business_objects/record_label_association.py +++ b/business_objects/record_label_association.py @@ -800,7 +800,7 @@ def check_label_duplication_classification( return True -def is_any_record_manually_labeled(project_id: str): +def is_any_record_manually_labeled(project_id: str) -> bool: query = f""" SELECT id FROM record_label_association rla @@ -812,6 +812,23 @@ def is_any_record_manually_labeled(project_id: str): return value is not None +def is_any_record_manually_labeled_by_lt_id( + project_id: str, labeling_task_id: str +) -> bool: + query = f""" + SELECT rla.id + FROM record_label_association rla + INNER JOIN labeling_task_label ltl + ON rla.labeling_task_label_id = ltl.id AND ltl.project_id = rla.project_id + WHERE rla.project_id = '{project_id}' + AND ltl.labeling_task_id = '{labeling_task_id}' + AND rla.source_type = '{enums.LabelSource.MANUAL.value}' + LIMIT 1 + """ + value = general.execute_first(query) + return value is not None + + def __get_base_query_valid_labels_manual_for_update( project_id: str, labeling_task_id: str = "", record_id: str = "" ) -> str: From 7ecd2d2e118aae9aa272c5dd57368c560528b3c6 Mon Sep 17 00:00:00 2001 From: FelixKirschKern Date: Mon, 9 Oct 2023 09:35:40 +0200 Subject: [PATCH 2/3] pr comment, combine methods with optional parameter --- business_objects/record_label_association.py | 30 +++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/business_objects/record_label_association.py b/business_objects/record_label_association.py index be0a452a..1721ce94 100644 --- a/business_objects/record_label_association.py +++ b/business_objects/record_label_association.py @@ -800,29 +800,25 @@ def check_label_duplication_classification( return True -def is_any_record_manually_labeled(project_id: str) -> bool: - query = f""" - SELECT id - FROM record_label_association rla - WHERE project_id = '{project_id}' - AND source_type = '{enums.LabelSource.MANUAL.value}' - LIMIT 1 - """ - value = general.execute_first(query) - return value is not None - - -def is_any_record_manually_labeled_by_lt_id( - project_id: str, labeling_task_id: str +def is_any_record_manually_labeled( + project_id: str, labeling_task_id: Optional[str] = None ) -> bool: + query_join_add = "" + query_where_add = "" + if labeling_task_id: + query_join_add = """ + INNER JOIN labeling_task_label ltl + ON rla.labeling_task_label_id = ltl.id AND ltl.project_id = rla.project_id""" + query_where_add = f""" + AND ltl.labeling_task_id = '{labeling_task_id}'""" + query = f""" SELECT rla.id FROM record_label_association rla - INNER JOIN labeling_task_label ltl - ON rla.labeling_task_label_id = ltl.id AND ltl.project_id = rla.project_id + {query_join_add} WHERE rla.project_id = '{project_id}' - AND ltl.labeling_task_id = '{labeling_task_id}' AND rla.source_type = '{enums.LabelSource.MANUAL.value}' + {query_where_add} LIMIT 1 """ value = general.execute_first(query) From cac83340fb2e2742ef0859827537f51eea24379e Mon Sep 17 00:00:00 2001 From: FelixKirschKern Date: Mon, 9 Oct 2023 16:20:53 +0200 Subject: [PATCH 3/3] removes unused method --- business_objects/information_source.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/business_objects/information_source.py b/business_objects/information_source.py index 19194195..bbefa2da 100644 --- a/business_objects/information_source.py +++ b/business_objects/information_source.py @@ -105,23 +105,6 @@ def get_selected_information_sources(project_id: str) -> str: return ", ".join([str(x.name) for x in information_sources]) -def get_task_information_sources(project_id: str, labeling_task_id: str) -> str: - information_sources = ( - session.query(InformationSource.name) - .filter( - InformationSource.project_id == project_id, - InformationSource.labeling_task_id == labeling_task_id, - InformationSourceStatistics.source_id == InformationSource.id, - InformationSourceStatistics.true_positives - > 0, # only collect valid options - ) - .all() - ) - if not information_sources: - return "" - return ", ".join([str(x.name) for x in information_sources]) - - def get_payloads_by_project_id(project_id: str) -> List[Any]: query: str = f""" SELECT