From 23b2898f9e1e5a6c56448ee68b0f42184bb709e1 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Thu, 2 May 2024 14:03:50 -0300 Subject: [PATCH 001/105] Update Finetuner search metadata functional tests (#172) --- tests/functional/finetune/data/finetune_test_end2end.json | 8 ++++---- tests/functional/finetune/finetune_functional_test.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/functional/finetune/data/finetune_test_end2end.json b/tests/functional/finetune/data/finetune_test_end2end.json index ead1fd88..80768de9 100644 --- a/tests/functional/finetune/data/finetune_test_end2end.json +++ b/tests/functional/finetune/data/finetune_test_end2end.json @@ -10,17 +10,17 @@ { "model_name": "aiR", "model_id": "6499cc946eb5633de15d82a1", - "dataset_name": "Test search dataset metadata", + "dataset_name": "Test search dataset", "inference_data": "Hello!", "required_dev": false, - "search_metadata": true + "search_metadata": false }, { "model_name": "vectara", "model_id": "655e20f46eb563062a1aa301", - "dataset_name": "Test search dataset metadata", + "dataset_name": "Test search dataset", "inference_data": "Hello!", "required_dev": false, - "search_metadata": true + "search_metadata": false } ] \ No newline at end of file diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index 94693f05..ffa9ad5a 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -83,6 +83,8 @@ def test_end2end(run_input_map): time.sleep(5) end = time.time() assert finetune_model.check_finetune_status().model_status.value == "onboarded" + time.sleep(30) + print(f"Model dict: {finetune_model.__dict__}") result = finetune_model.run(run_input_map["inference_data"]) print(f"Result: {result}") assert result is not None From 208a0814ff2adbb562f923993759f265bd57c0f2 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 2 May 2024 14:09:12 -0300 Subject: [PATCH 002/105] Downgrade dataclasses-json for compatibility (#170) Co-authored-by: Thiago Castro Ferreira --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9ad67878..112c8f9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.5rc" +version = "0.2.12" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" @@ -51,7 +51,7 @@ dependencies = [ "filetype>=1.2.0", "click>=7.1.2,<8.0.0", "PyYAML>=6.0.1", - "dataclasses-json==0.6.1" + "dataclasses-json>=0.5.2" ] [project.urls] From a837e1a5777ee12de2a1bddb991b4c47510247f1 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 10 May 2024 11:33:49 -0300 Subject: [PATCH 003/105] Fix model cost parameters (#179) Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/model_factory.py | 22 +++++++++---------- aixplain/modules/asset.py | 3 ++- aixplain/modules/metric.py | 15 ++++++++----- aixplain/modules/model.py | 12 ++++++---- .../general_assets/asset_functional_test.py | 4 ++-- 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index cd7de970..9ed3138f 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -65,7 +65,7 @@ def _create_model_from_response(cls, response: Dict) -> Model: response["name"], supplier=response["supplier"], api_key=response["api_key"], - pricing=response["pricing"], + cost=response["pricing"], function=Function(response["function"]["id"]), parameters=parameters, is_subscribed=True if "subscription" in response else False, @@ -404,9 +404,11 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed." logging.info(message) return response - + @classmethod - def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict: + def deploy_huggingface_model( + cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None + ) -> Dict: """Onboards and deploys a Hugging Face large language model. Args: @@ -433,20 +435,16 @@ def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Option "sourceLanguage": "en", }, "source": "huggingface", - "onboardingParams": { - "hf_model_name": model_name, - "hf_supplier": supplier, - "hf_token": hf_token - } + "onboardingParams": {"hf_model_name": model_name, "hf_supplier": supplier, "hf_token": hf_token}, } response = _request_with_retry("post", deploy_url, headers=headers, json=body) logging.debug(response.text) response_dicts = json.loads(response.text) return response_dicts - + @classmethod def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] = None): - """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID. + """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID. Args: model_id (Text): The model's ID as returned by DEPLOY_HUGGINGFACE_MODEL @@ -466,6 +464,6 @@ def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] = "status": response_dicts["status"], "name": response_dicts["name"], "id": response_dicts["id"], - "pricing": response_dicts["pricing"] + "pricing": response_dicts["pricing"], } - return ret_dict \ No newline at end of file + return ret_dict diff --git a/aixplain/modules/asset.py b/aixplain/modules/asset.py index 34fea4e4..52b79912 100644 --- a/aixplain/modules/asset.py +++ b/aixplain/modules/asset.py @@ -36,7 +36,7 @@ def __init__( version: Text = "1.0", license: Optional[License] = None, privacy: Privacy = Privacy.PRIVATE, - cost: float = 0, + cost: Optional[Union[Dict, float]] = None, ) -> None: """Create an Asset with the necessary information @@ -46,6 +46,7 @@ def __init__( description (Text): Description of the Asset supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". version (Optional[Text], optional): asset version. Defaults to "1.0". + cost (Optional[Union[Dict, float]], optional): asset price. Defaults to None. """ self.id = id self.name = name diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index 04a0bdd7..d591772b 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -61,12 +61,10 @@ def __init__( supplier (Text): author of the Metric is_reference_required (bool): does the metric use reference is_source_required (bool): does the metric use source - cost (float): cost of the metric + cost (float): price of the metric normalization_options(list, []) **additional_info: Any additional Metric info to be saved """ - - super().__init__(id, name, description="", supplier=supplier, version="1.0", cost=cost) self.is_source_required = is_source_required self.is_reference_required = is_reference_required @@ -76,7 +74,7 @@ def __init__( def __repr__(self) -> str: return f"" - + def add_normalization_options(self, normalization_options: List[str]): """Add a given set of normalization options to be used while benchmarking @@ -85,7 +83,12 @@ def add_normalization_options(self, normalization_options: List[str]): """ self.normalization_options.append(normalization_options) - def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional[Union[str, List[str]]]=None, reference: Optional[Union[str, List[str]]]=None): + def run( + self, + hypothesis: Optional[Union[str, List[str]]] = None, + source: Optional[Union[str, List[str]]] = None, + reference: Optional[Union[str, List[str]]] = None, + ): """Run the metric to calculate the scores. Args: @@ -94,6 +97,7 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None. """ from aixplain.factories.model_factory import ModelFactory + model = ModelFactory.get(self.id) payload = { "function": self.function, @@ -115,4 +119,3 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional reference = [[ref] for ref in reference] payload["references"] = reference return model.run(payload) - diff --git a/aixplain/modules/model.py b/aixplain/modules/model.py index fc3a82cd..983737c7 100644 --- a/aixplain/modules/model.py +++ b/aixplain/modules/model.py @@ -48,6 +48,7 @@ class Model(Asset): function (Text, optional): model AI function. Defaults to None. url (str): URL to run the model. backend_url (str): URL of the backend. + pricing (Dict, optional): model price. Defaults to None. **additional_info: Any additional Model info to be saved """ @@ -61,6 +62,7 @@ def __init__( version: Optional[Text] = None, function: Optional[Text] = None, is_subscribed: bool = False, + cost: Optional[Dict] = None, **additional_info, ) -> None: """Model Init @@ -74,9 +76,10 @@ def __init__( version (Text, optional): version of the model. Defaults to "1.0". function (Text, optional): model AI function. Defaults to None. is_subscribed (bool, optional): Is the user subscribed. Defaults to False. + cost (Dict, optional): model price. Defaults to None. **additional_info: Any additional Model info to be saved """ - super().__init__(id, name, description, supplier, version) + super().__init__(id, name, description, supplier, version, cost=cost) self.api_key = api_key self.additional_info = additional_info self.url = config.MODELS_RUN_URL @@ -264,6 +267,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): """ from aixplain.enums.asset_status import AssetStatus from aixplain.modules.finetune.status import FinetuneStatus + headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} resp = None try: @@ -274,7 +278,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): finetune_status = AssetStatus(resp["finetuneStatus"]) model_status = AssetStatus(resp["modelStatus"]) logs = sorted(resp["logs"], key=lambda x: float(x["epoch"])) - + target_epoch = None if after_epoch is not None: logs = [log for log in logs if float(log["epoch"]) > after_epoch] @@ -282,7 +286,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): target_epoch = float(logs[0]["epoch"]) elif len(logs) > 0: target_epoch = float(logs[-1]["epoch"]) - + if target_epoch is not None: log = None for log_ in logs: @@ -294,7 +298,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): log["trainLoss"] = log_["trainLoss"] if log_["evalLoss"] is not None: log["evalLoss"] = log_["evalLoss"] - + status = FinetuneStatus( status=finetune_status, model_status=model_status, diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index 6a9dceda..93a3b297 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -82,8 +82,8 @@ def test_model_sort(): prev_model = models[idx - 1] model = models[idx] - prev_model_price = prev_model.additional_info["pricing"]["price"] - model_price = model.additional_info["pricing"]["price"] + prev_model_price = prev_model.cost["price"] + model_price = model.cost["price"] assert prev_model_price >= model_price From 754f478cf94f0a0af4660242d44312cededf335e Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 15 May 2024 19:46:58 -0300 Subject: [PATCH 004/105] Treat label URLs (#176) Co-authored-by: Thiago Castro Ferreira --- aixplain/processes/data_onboarding/process_media_files.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/aixplain/processes/data_onboarding/process_media_files.py b/aixplain/processes/data_onboarding/process_media_files.py index 8b333d72..62fd369a 100644 --- a/aixplain/processes/data_onboarding/process_media_files.py +++ b/aixplain/processes/data_onboarding/process_media_files.py @@ -5,6 +5,7 @@ import pandas as pd import shutil import tarfile +import validators from aixplain.enums.data_subtype import DataSubtype from aixplain.enums.data_type import DataType @@ -115,6 +116,13 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) -> shutil.copy2(media_path, new_path) batch.append(fname) else: + if metadata.storage_type == StorageType.TEXT and ( + str(media_path).startswith("s3://") + or str(media_path).startswith("http://") + or str(media_path).startswith("https://") + or validators.url(media_path) + ): + media_path = "DONOTDOWNLOAD" + str(media_path) batch.append(media_path) # crop intervals can not be used with interval data types From f1c9935c53afd7185243a455d9026e9088a46b59 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 4 Jun 2024 13:48:28 -0300 Subject: [PATCH 005/105] Add new metric test (#181) * Add new metric test * Enable testing new pipeline executor --------- Co-authored-by: Thiago Castro Ferreira --- tests/functional/pipelines/run_test.py | 139 +++++++++++++++++++------ 1 file changed, 108 insertions(+), 31 deletions(-) diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index e4389587..e8bc4d9c 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -18,6 +18,7 @@ import pytest import os +import requests from aixplain.factories import DatasetFactory, PipelineFactory @@ -38,61 +39,110 @@ def test_get_pipeline(): assert hypothesis_pipeline.id == reference_pipeline.id -@pytest.mark.parametrize("batchmode", [True, False]) -def test_run_single_str(batchmode: bool): +@pytest.mark.parametrize( + "batchmode,version", + [ + (True, "2.0"), + (True, "3.0"), + (False, "2.0"), + (False, "3.0"), + ], +) +def test_run_single_str(batchmode: bool, version: str): pipeline = PipelineFactory.list(query="SingleNodePipeline")["results"][0] - response = pipeline.run(data="Translate this thing", **{"batchmode": batchmode}) + response = pipeline.run(data="Translate this thing", **{"batchmode": batchmode, "version": version}) assert response["status"] == "SUCCESS" -@pytest.mark.parametrize("batchmode", [True, False]) -def test_run_single_local_file(batchmode: bool): +@pytest.mark.parametrize( + "batchmode,version", + [ + (True, "2.0"), + (True, "3.0"), + (False, "2.0"), + (False, "3.0"), + ], +) +def test_run_single_local_file(batchmode: bool, version: str): pipeline = PipelineFactory.list(query="SingleNodePipeline")["results"][0] fname = "translate_this.txt" with open(fname, "w") as f: f.write("Translate this thing") - response = pipeline.run(data=fname, **{"batchmode": batchmode}) + response = pipeline.run(data=fname, **{"batchmode": batchmode, "version": version}) os.remove(fname) assert response["status"] == "SUCCESS" -@pytest.mark.parametrize("batchmode", [True, False]) -def test_run_with_url(batchmode: bool): +@pytest.mark.parametrize( + "batchmode,version", + [ + (True, "2.0"), + (True, "3.0"), + (False, "2.0"), + (False, "3.0"), + ], +) +def test_run_with_url(batchmode: bool, version: str): pipeline = PipelineFactory.list(query="SingleNodePipeline")["results"][0] response = pipeline.run( data="https://aixplain-platform-assets.s3.amazonaws.com/data/dev/64c81163f8bdcac7443c2dad/data/f8.txt", - **{"batchmode": batchmode} + **{"batchmode": batchmode, "version": version} ) assert response["status"] == "SUCCESS" -@pytest.mark.parametrize("batchmode", [True, False]) -def test_run_with_dataset(batchmode: bool): +@pytest.mark.parametrize( + "batchmode,version", + [ + (True, "2.0"), + (True, "3.0"), + (False, "2.0"), + (False, "3.0"), + ], +) +def test_run_with_dataset(batchmode: bool, version: str): dataset = DatasetFactory.list(query="for_functional_tests")["results"][0] data_asset_id = dataset.id data_id = dataset.source_data["en"].id pipeline = PipelineFactory.list(query="SingleNodePipeline")["results"][0] - response = pipeline.run(data=data_id, data_asset=data_asset_id, **{"batchmode": batchmode}) + response = pipeline.run(data=data_id, data_asset=data_asset_id, **{"batchmode": batchmode, "version": version}) assert response["status"] == "SUCCESS" -@pytest.mark.parametrize("batchmode", [True, False]) -def test_run_multipipe_with_strings(batchmode: bool): +@pytest.mark.parametrize( + "batchmode,version", + [ + (True, "2.0"), + (True, "3.0"), + (False, "2.0"), + (False, "3.0"), + ], +) +def test_run_multipipe_with_strings(batchmode: bool, version: str): pipeline = PipelineFactory.list(query="MultiInputPipeline")["results"][0] response = pipeline.run( - data={"Input": "Translate this thing.", "Reference": "Traduza esta coisa."}, **{"batchmode": batchmode} + data={"Input": "Translate this thing.", "Reference": "Traduza esta coisa."}, + **{"batchmode": batchmode, "version": version} ) assert response["status"] == "SUCCESS" -@pytest.mark.parametrize("batchmode", [True, False]) -def test_run_multipipe_with_datasets(batchmode: bool): +@pytest.mark.parametrize( + "batchmode,version", + [ + (True, "2.0"), + (True, "3.0"), + (False, "2.0"), + (False, "3.0"), + ], +) +def test_run_multipipe_with_datasets(batchmode: bool, version: str): pipeline = PipelineFactory.list(query="MultiInputPipeline")["results"][0] dataset = DatasetFactory.list(query="for_functional_tests")["results"][0] @@ -104,27 +154,50 @@ def test_run_multipipe_with_datasets(batchmode: bool): response = pipeline.run( data={"Input": input_id, "Reference": reference_id}, data_asset={"Input": data_asset_id, "Reference": data_asset_id}, - **{"batchmode": batchmode} + **{"batchmode": batchmode, "version": version} ) assert response["status"] == "SUCCESS" -def test_run_segment_reconstruct(): +@pytest.mark.parametrize("version", ["2.0", "3.0"]) +def test_run_segment_reconstruct(version: str): pipeline = PipelineFactory.list(query="Segmentation/Reconstruction Functional Test - DO NOT DELETE")["results"][0] - response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav") + response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", **{"version": version}) assert response["status"] == "SUCCESS" output = response["data"][0] assert output["label"] == "Output 1" -def test_run_metric(): +@pytest.mark.parametrize("version", ["2.0", "3.0"]) +def test_run_translation_metric(version: str): + dataset = DatasetFactory.list(query="for_functional_tests")["results"][0] + data_asset_id = dataset.id + + reference_id = dataset.target_data["pt"][0].id + + pipeline = PipelineFactory.list(query="Translation Metric Functional Test - DO NOT DELETE")["results"][0] + response = pipeline.run( + data={"TextInput": reference_id, "ReferenceInput": reference_id}, + data_asset={"TextInput": data_asset_id, "ReferenceInput": data_asset_id}, + **{"version": version} + ) + + assert response["status"] == "SUCCESS" + data = response["data"][0]["segments"][0]["response"] + data = requests.get(data).text + assert float(data) == 100.0 + + +@pytest.mark.parametrize("version", ["2.0", "3.0"]) +def test_run_metric(version: str): pipeline = PipelineFactory.list(query="ASR Metric Functional Test - DO NOT DELETE")["results"][0] response = pipeline.run( { "AudioInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "ReferenceInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", - } + }, + **{"version": version} ) assert response["status"] == "SUCCESS" @@ -134,15 +207,17 @@ def test_run_metric(): @pytest.mark.parametrize( - "input_data,output_data", + "input_data,output_data,version", [ - ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"), - ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput"), + ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput", "2.0"), + ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput", "2.0"), + ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput", "3.0"), + ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput", "3.0"), ], ) -def test_run_router(input_data: str, output_data: str): +def test_run_router(input_data: str, output_data: str, version: str): pipeline = PipelineFactory.list(query="Router Test - DO NOT DELETE")["results"][0] - response = pipeline.run(input_data) + response = pipeline.run(input_data, **{"version": version}) assert response["status"] == "SUCCESS" assert response["data"][0]["label"] == output_data @@ -151,13 +226,15 @@ def test_run_router(input_data: str, output_data: str): @pytest.mark.parametrize( "input_data,output_data", [ - ("I love it.", "PositiveOutput"), - ("I hate it.", "NegativeOutput"), + ("I love it.", "PositiveOutput", "2.0"), + ("I hate it.", "NegativeOutput", "2.0"), + ("I love it.", "PositiveOutput", "3.0"), + ("I hate it.", "NegativeOutput", "3.0"), ], ) -def test_run_decision(input_data: str, output_data: str): +def test_run_decision(input_data: str, output_data: str, version: str): pipeline = PipelineFactory.list(query="Decision Test - DO NOT DELETE")["results"][0] - response = pipeline.run(input_data) + response = pipeline.run(input_data, **{"version": version}) assert response["status"] == "SUCCESS" assert response["data"][0]["label"] == output_data From a48ccfd6be9ceaba774cbf50cc0b07f5d87299aa Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:01:47 -0300 Subject: [PATCH 006/105] LLMModel class and parameters (#184) * LLMModel class and parameters * Change in the documentation * Changing LLMModel for LLM * Remove frequency penalty --------- Co-authored-by: Thiago Castro Ferreira --- .pre-commit-config.yaml | 9 +- aixplain/factories/model_factory.py | 24 +- aixplain/modules/__init__.py | 1 + .../modules/{model.py => model/__init__.py} | 8 +- aixplain/modules/model/llm_model.py | 227 ++++++++++++++++++ docs/user/user_doc.md | 18 ++ .../general_assets/asset_functional_test.py | 10 +- tests/functional/model/run_model_test.py | 22 ++ 8 files changed, 304 insertions(+), 15 deletions(-) rename aixplain/modules/{model.py => model/__init__.py} (98%) create mode 100644 aixplain/modules/model/llm_model.py create mode 100644 tests/functional/model/run_model_test.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1395dfa6..a79973ee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,4 +15,11 @@ repos: - id: black language_version: python3 args: # arguments to configure black - - --line-length=128 \ No newline at end of file + - --line-length=128 + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.0.0 # Use the latest version + hooks: + - id: flake8 + args: # arguments to configure black + - --ignore=E402,E501 \ No newline at end of file diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index 9ed3138f..221fd94d 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -24,6 +24,7 @@ import json import logging from aixplain.modules.model import Model +from aixplain.modules.model.llm_model import LLM from aixplain.enums import Function, Language, OwnershipType, Supplier, SortBy, SortOrder from aixplain.utils import config from aixplain.utils.file_utils import _request_with_retry @@ -60,13 +61,18 @@ def _create_model_from_response(cls, response: Dict) -> Model: if "language" in param["name"]: parameters[param["name"]] = [w["value"] for w in param["values"]] - return Model( + function = Function(response["function"]["id"]) + ModelClass = Model + if function == Function.TEXT_GENERATION: + ModelClass = LLM + + return ModelClass( response["id"], response["name"], supplier=response["supplier"], api_key=response["api_key"], cost=response["pricing"], - function=Function(response["function"]["id"]), + function=function, parameters=parameters, is_subscribed=True if "subscription" in response else False, version=response["version"]["id"], @@ -100,7 +106,7 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: model = cls._create_model_from_response(resp) logging.info(f"Model Creation: Model {model_id} instantiated.") return model - except Exception as e: + except Exception: if resp is not None and "statusCode" in resp: status_code = resp["statusCode"] message = resp["message"] @@ -135,7 +141,7 @@ def _get_assets_from_page( sort_order: SortOrder = SortOrder.ASCENDING, ) -> List[Model]: try: - url = urljoin(cls.backend_url, f"sdk/models/paginate") + url = urljoin(cls.backend_url, "sdk/models/paginate") filter_params = {"q": query, "pageNumber": page_number, "pageSize": page_size} if is_finetunable is not None: filter_params["isFineTunable"] = is_finetunable @@ -253,7 +259,7 @@ def list_host_machines(cls, api_key: Optional[Text] = None) -> List[Dict]: List[Dict]: List of dictionaries containing information about each hosting machine. """ - machines_url = urljoin(config.BACKEND_URL, f"sdk/hosting-machines") + machines_url = urljoin(config.BACKEND_URL, "sdk/hosting-machines") logging.debug(f"URL: {machines_url}") if api_key: headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} @@ -278,7 +284,7 @@ def list_functions(cls, verbose: Optional[bool] = False, api_key: Optional[Text] List[Dict]: List of dictionaries containing information about each supported function. """ - functions_url = urljoin(config.BACKEND_URL, f"sdk/functions") + functions_url = urljoin(config.BACKEND_URL, "sdk/functions") logging.debug(f"URL: {functions_url}") if api_key: headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} @@ -336,7 +342,7 @@ def create_asset_repo( function_id = function_dict["id"] if function_id is None: raise Exception("Invalid function name") - create_url = urljoin(config.BACKEND_URL, f"sdk/models/register") + create_url = urljoin(config.BACKEND_URL, "sdk/models/register") logging.debug(f"URL: {create_url}") if api_key: headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} @@ -370,7 +376,7 @@ def asset_repo_login(cls, api_key: Optional[Text] = None) -> Dict: Returns: Dict: Backend response """ - login_url = urljoin(config.BACKEND_URL, f"sdk/ecr/login") + login_url = urljoin(config.BACKEND_URL, "sdk/ecr/login") logging.debug(f"URL: {login_url}") if api_key: headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} @@ -420,7 +426,7 @@ def deploy_huggingface_model( Dict: Backend response """ supplier, model_name = hf_repo_id.split("/") - deploy_url = urljoin(config.BACKEND_URL, f"sdk/model-onboarding/onboard") + deploy_url = urljoin(config.BACKEND_URL, "sdk/model-onboarding/onboard") if api_key: headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} else: diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index bb9e696b..488c8c2f 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -27,6 +27,7 @@ from .metadata import MetaData from .metric import Metric from .model import Model +from .model.llm_model import LLM from .pipeline import Pipeline from .finetune import Finetune, FinetuneCost from .finetune.status import FinetuneStatus diff --git a/aixplain/modules/model.py b/aixplain/modules/model/__init__.py similarity index 98% rename from aixplain/modules/model.py rename to aixplain/modules/model/__init__.py index 983737c7..12c96977 100644 --- a/aixplain/modules/model.py +++ b/aixplain/modules/model/__init__.py @@ -45,7 +45,7 @@ class Model(Asset): url (Text, optional): endpoint of the model. Defaults to config.MODELS_RUN_URL. supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". version (Text, optional): version of the model. Defaults to "1.0". - function (Text, optional): model AI function. Defaults to None. + function (Function, optional): model AI function. Defaults to None. url (str): URL to run the model. backend_url (str): URL of the backend. pricing (Dict, optional): model price. Defaults to None. @@ -60,7 +60,7 @@ def __init__( api_key: Optional[Text] = None, supplier: Union[Dict, Text, Supplier, int] = "aiXplain", version: Optional[Text] = None, - function: Optional[Text] = None, + function: Optional[Function] = None, is_subscribed: bool = False, cost: Optional[Dict] = None, **additional_info, @@ -102,7 +102,7 @@ def __repr__(self): except Exception: return f"" - def __polling(self, poll_url: Text, name: Text = "model_process", wait_time: float = 0.5, timeout: float = 300) -> Dict: + def sync_poll(self, poll_url: Text, name: Text = "model_process", wait_time: float = 0.5, timeout: float = 300) -> Dict: """Keeps polling the platform to check whether an asynchronous call is done. Args: @@ -198,7 +198,7 @@ def run( return response poll_url = response["url"] end = time.time() - response = self.__polling(poll_url, name=name, timeout=timeout, wait_time=wait_time) + response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) return response except Exception as e: msg = f"Error in request for {name} - {traceback.format_exc()}" diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py new file mode 100644 index 00000000..349ea595 --- /dev/null +++ b/aixplain/modules/model/llm_model.py @@ -0,0 +1,227 @@ +__author__ = "lucaspavanelli" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Thiago Castro Ferreira, Shreyas Sharma and Lucas Pavanelli +Date: June 4th 2024 +Description: + Large Language Model Class +""" +import time +import json +import logging +import traceback +from aixplain.factories.file_factory import FileFactory +from aixplain.enums import Function, Supplier +from aixplain.modules.model import Model +from aixplain.utils import config +from aixplain.utils.file_utils import _request_with_retry +from typing import Union, Optional, List, Text, Dict + + +class LLM(Model): + """Ready-to-use LLM model. This model can be run in both synchronous and asynchronous manner. + + Attributes: + id (Text): ID of the Model + name (Text): Name of the Model + description (Text, optional): description of the model. Defaults to "". + api_key (Text, optional): API key of the Model. Defaults to None. + url (Text, optional): endpoint of the model. Defaults to config.MODELS_RUN_URL. + supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". + version (Text, optional): version of the model. Defaults to "1.0". + function (Text, optional): model AI function. Defaults to None. + url (str): URL to run the model. + backend_url (str): URL of the backend. + pricing (Dict, optional): model price. Defaults to None. + **additional_info: Any additional Model info to be saved + """ + + def __init__( + self, + id: Text, + name: Text, + description: Text = "", + api_key: Optional[Text] = None, + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", + version: Optional[Text] = None, + function: Optional[Function] = None, + is_subscribed: bool = False, + cost: Optional[Dict] = None, + **additional_info, + ) -> None: + """LLM Init + + Args: + id (Text): ID of the Model + name (Text): Name of the Model + description (Text, optional): description of the model. Defaults to "". + api_key (Text, optional): API key of the Model. Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". + version (Text, optional): version of the model. Defaults to "1.0". + function (Function, optional): model AI function. Defaults to None. + is_subscribed (bool, optional): Is the user subscribed. Defaults to False. + cost (Dict, optional): model price. Defaults to None. + **additional_info: Any additional Model info to be saved + """ + assert function == Function.TEXT_GENERATION, "LLM only supports large language models (i.e. text generation function)" + super().__init__( + id=id, + name=name, + description=description, + supplier=supplier, + version=version, + cost=cost, + function=function, + is_subscribed=is_subscribed, + api_key=api_key, + **additional_info, + ) + self.url = config.MODELS_RUN_URL + self.backend_url = config.BACKEND_URL + + def run( + self, + data: Text, + context: Optional[Text] = None, + prompt: Optional[Text] = None, + history: Optional[List[Dict]] = None, + temperature: float = 0.001, + max_tokens: int = 128, + top_p: float = 1.0, + name: Text = "model_process", + timeout: float = 300, + parameters: Dict = {}, + wait_time: float = 0.5, + ) -> Dict: + """Synchronously running a Large Language Model (LLM) model. + + Args: + data (Union[Text, Dict]): Text to LLM or last user utterance of a conversation. + context (Optional[Text], optional): System message. Defaults to None. + prompt (Optional[Text], optional): Prompt Message which comes on the left side of the last utterance. Defaults to None. + history (Optional[List[Dict]], optional): Conversation history in OpenAI format ([{ "role": "assistant", "content": "Hello, world!"}]). Defaults to None. + temperature (float, optional): LLM temperature. Defaults to 0.001. + max_tokens (int, optional): Maximum Generation Tokens. Defaults to 128. + top_p (float, optional): Top P. Defaults to 1.0. + name (Text, optional): ID given to a call. Defaults to "model_process". + timeout (float, optional): total polling time. Defaults to 300. + parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. + + Returns: + Dict: parsed output from model + """ + start = time.time() + try: + response = self.run_async( + data, + name=name, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + context=context, + prompt=prompt, + history=history, + parameters=parameters, + ) + if response["status"] == "FAILED": + end = time.time() + response["elapsed_time"] = end - start + return response + poll_url = response["url"] + end = time.time() + response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) + return response + except Exception as e: + msg = f"Error in request for {name} - {traceback.format_exc()}" + logging.error(f"LLM Run: Error in running for {name}: {e}") + end = time.time() + return {"status": "FAILED", "error": msg, "elapsed_time": end - start} + + def run_async( + self, + data: Text, + context: Optional[Text] = None, + prompt: Optional[Text] = None, + history: Optional[List[Dict]] = None, + temperature: float = 0.001, + max_tokens: int = 128, + top_p: float = 1.0, + name: Text = "model_process", + parameters: Dict = {}, + ) -> Dict: + """Runs asynchronously a model call. + + Args: + data (Union[Text, Dict]): Text to LLM or last user utterance of a conversation. + context (Optional[Text], optional): System message. Defaults to None. + prompt (Optional[Text], optional): Prompt Message which comes on the left side of the last utterance. Defaults to None. + history (Optional[List[Dict]], optional): Conversation history in OpenAI format ([{ "role": "assistant", "content": "Hello, world!"}]). Defaults to None. + temperature (float, optional): LLM temperature. Defaults to 0.001. + max_tokens (int, optional): Maximum Generation Tokens. Defaults to 128. + top_p (float, optional): Top P. Defaults to 1.0. + name (Text, optional): ID given to a call. Defaults to "model_process". + parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + + Returns: + dict: polling URL in response + """ + headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + + data = FileFactory.to_link(data) + if isinstance(data, dict): + payload = data + else: + try: + payload = json.loads(data) + if isinstance(payload, dict) is False: + if isinstance(payload, int) is True or isinstance(payload, float) is True: + payload = str(payload) + payload = {"data": payload} + except Exception: + payload = {"data": data} + parameters.update( + { + "context": context, + "prompt": prompt, + "history": history, + "temperature": temperature, + "max_tokens": max_tokens, + "top_p": top_p, + } + ) + payload.update(parameters) + payload = json.dumps(payload) + + call_url = f"{self.url}/{self.id}" + r = _request_with_retry("post", call_url, headers=headers, data=payload) + logging.info(f"Model Run Async: Start service for {name} - {self.url} - {payload} - {headers}") + + resp = None + try: + resp = r.json() + logging.info(f"Result of request for {name} - {r.status_code} - {resp}") + + poll_url = resp["data"] + response = {"status": "IN_PROGRESS", "url": poll_url} + except Exception: + response = {"status": "FAILED"} + msg = f"Error in request for {name} - {traceback.format_exc()}" + logging.error(f"Model Run Async: Error in running for {name}: {resp}") + if resp is not None: + response["error"] = msg + return response diff --git a/docs/user/user_doc.md b/docs/user/user_doc.md index 400ad0d2..a7fa2178 100644 --- a/docs/user/user_doc.md +++ b/docs/user/user_doc.md @@ -57,6 +57,24 @@ poll_url = start_response["url"] ## Poll to see current job status poll_response = model.poll(poll_url) ``` + +You may also set special parameters for Large Language Models in the platform. + +```python +from aixplain.factories import ModelFactory +from aixplain.enums import Function +model = ModelFactory.list(query="GPT-4o", function=Function.TEXT_GENERATION)["results"][0] +response = model.run( + data="What is my name?", # last utterance + context="Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.", # system prompt + history=[ + { "role": "user", "content": "Hello! My name is James." }, + { "role": "assistant", "content": "Hello!" } + ], # conversation history, + temperature=0.7 +) +``` + ### Deploying Hugging Face Large Language Models You can deploy your very own Hugging Face large language models on our platform using the aiXplain SDK: ```console diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index 93a3b297..d35a4d9a 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -3,6 +3,7 @@ load_dotenv() from aixplain.factories import ModelFactory, DatasetFactory, MetricFactory, PipelineFactory +from aixplain.modules import LLM from pathlib import Path from aixplain.enums import Function, Language, OwnershipType, Supplier, SortBy, SortOrder @@ -90,7 +91,7 @@ def test_model_sort(): def test_model_ownership(): models = ModelFactory.list(ownership=OwnershipType.SUBSCRIBED)["results"] for model in models: - assert model.is_subscribed == True + assert model.is_subscribed is True def test_model_query(): @@ -101,6 +102,13 @@ def test_model_query(): def test_model_deletion(): + """Test that a model cannot be deleted.""" model = ModelFactory.get("640b517694bf816d35a59125") with pytest.raises(Exception): model.delete() + + +def test_llm_instantiation(): + """Test that the LLM model is correctly instantiated.""" + models = ModelFactory.list(function=Function.TEXT_GENERATION)["results"] + assert isinstance(models[0], LLM) diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py new file mode 100644 index 00000000..79979357 --- /dev/null +++ b/tests/functional/model/run_model_test.py @@ -0,0 +1,22 @@ +__author__ = "thiagocastroferreira" + +import pytest + +from aixplain.enums import Function +from aixplain.factories import ModelFactory +from aixplain.modules import LLM + + +@pytest.mark.parametrize("llm_model", ["Groq Llama 3 70B", "Chat GPT 3.5", "GPT-4o", "GPT 4 (32k)"]) +def test_llm_run(llm_model): + """Testing LLMs with history context""" + model = ModelFactory.list(query=llm_model, function=Function.TEXT_GENERATION)["results"][0] + + assert isinstance(model, LLM) + + response = model.run( + data="What is my name?", + history=[{"role": "user", "content": "Hello! My name is Thiago."}, {"role": "assistant", "content": "Hello!"}], + ) + assert response["status"] == "SUCCESS" + assert "thiago" in response["data"].lower() From c7f59ce74c033b46e2f2f190dff19254f10ec9e6 Mon Sep 17 00:00:00 2001 From: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Date: Wed, 5 Jun 2024 11:34:33 -0700 Subject: [PATCH 007/105] Gpus (#185) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Release. (#141) * Merge dev to test (#107) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Development to Test (#109) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> * Merge to test (#111) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#118) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Merge to test (#124) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Merge dev to test (#126) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Merge dev to test (#129) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message * Add new LLMs finetuner models (mistral and solar) (#128) --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Merge to test (#135) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message * Add new LLMs finetuner models (mistral and solar) (#128) * Enabling dataset ID and model ID as parameters for finetuner creation (#131) Co-authored-by: Thiago Castro Ferreira * Fix supplier representation of a model (#132) * Fix supplier representation of a model * Fixing parameter typing --------- Co-authored-by: Thiago Castro Ferreira * Fixing indentation in documentation sample code (#134) Co-authored-by: Thiago Castro Ferreira --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira * Merge dev to test (#137) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message * Add new LLMs finetuner models (mistral and solar) (#128) * Enabling dataset ID and model ID as parameters for finetuner creation (#131) Co-authored-by: Thiago Castro Ferreira * Fix supplier representation of a model (#132) * Fix supplier representation of a model * Fixing parameter typing --------- Co-authored-by: Thiago Castro Ferreira * Fixing indentation in documentation sample code (#134) Co-authored-by: Thiago Castro Ferreira * Update FineTune unit and functional tests (#136) --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira * Merge to prod. (#152) * Merge dev to test (#107) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Development to Test (#109) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> * Merge to test (#111) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#118) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Merge to test (#124) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Merge dev to test (#126) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Merge dev to test (#129) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message * Add new LLMs finetuner models (mistral and solar) (#128) --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Merge to test (#135) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message * Add new LLMs finetuner models (mistral and solar) (#128) * Enabling dataset ID and model ID as parameters for finetuner creation (#131) Co-authored-by: Thiago Castro Ferreira * Fix supplier representation of a model (#132) * Fix supplier representation of a model * Fixing parameter typing --------- Co-authored-by: Thiago Castro Ferreira * Fixing indentation in documentation sample code (#134) Co-authored-by: Thiago Castro Ferreira --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira * Merge dev to test (#137) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message * Add new LLMs finetuner models (mistral and solar) (#128) * Enabling dataset ID and model ID as parameters for finetuner creation (#131) Co-authored-by: Thiago Castro Ferreira * Fix supplier representation of a model (#132) * Fix supplier representation of a model * Fixing parameter typing --------- Co-authored-by: Thiago Castro Ferreira * Fixing indentation in documentation sample code (#134) Co-authored-by: Thiago Castro Ferreira * Update FineTune unit and functional tests (#136) --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira * Merge to test (#142) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message * Add new LLMs finetuner models (mistral and solar) (#128) * Enabling dataset ID and model ID as parameters for finetuner creation (#131) Co-authored-by: Thiago Castro Ferreira * Fix supplier representation of a model (#132) * Fix supplier representation of a model * Fixing parameter typing --------- Co-authored-by: Thiago Castro Ferreira * Fixing indentation in documentation sample code (#134) Co-authored-by: Thiago Castro Ferreira * Update FineTune unit and functional tests (#136) * Click fix (#140) * Merge to prod (#119) * Merge dev to test (#107) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Development to Test (#109) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> * Merge to test (#111) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#118) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.githu… * Updated onboarding docs (#155) * Updated onboarding docs Signed-off-by: mikelam-us-aixplain * under -> undergoing Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain * Added GPU endpoint Signed-off-by: mikelam-us-aixplain * Added host machine Signed-off-by: mikelam-us-aixplain * Adding CLI Signed-off-by: mikelam-us-aixplain * Updating create_asset_repo Signed-off-by: Michael Lam * Added CLI for creating asset repo Signed-off-by: mikelam-us-aixplain * Updating self-onboarding documentation Signed-off-by: Michael Lam * Updating user doc Signed-off-by: Michael Lam * cli change Signed-off-by: Michael Lam * Minor naming/documentation corrections Signed-off-by: mikelam-us-aixplain * Updated flag Signed-off-by: mikelam-us-aixplain * Updated verbose flag Signed-off-by: mikelam-us-aixplain * Hugging Face deployer is baaaacckkgit add aixplain/factories/model_factory.py Signed-off-by: mikelam-us-aixplain * Added revision argument to Hugging Face deployment Signed-off-by: mikelam-us-aixplain * Updated documentation Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Signed-off-by: Michael Lam Co-authored-by: ikxplain <88332269+ikxplain@users.noreply.github.com> Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira Co-authored-by: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira --- aixplain/cli_groups.py | 5 +- aixplain/factories/cli/model_factory_cli.py | 52 +++++++--- aixplain/factories/model_factory.py | 93 ++++++++++++------ aixplain/modules/metric.py | 1 - aixplain/modules/model/__init__.py | 4 - docs/user/user_doc.md | 94 ++++++++++--------- .../data/finetune_test_cost_estimation.json | 2 +- .../finetune/data/finetune_test_end2end.json | 4 +- .../finetune/finetune_functional_test.py | 2 +- 9 files changed, 157 insertions(+), 100 deletions(-) diff --git a/aixplain/cli_groups.py b/aixplain/cli_groups.py index c5f05826..ea5e28be 100644 --- a/aixplain/cli_groups.py +++ b/aixplain/cli_groups.py @@ -21,7 +21,7 @@ CLI Runner """ import click -from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model, deploy_huggingface_model, get_huggingface_model_status +from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model, deploy_huggingface_model, get_huggingface_model_status, list_gpus @click.group('cli') def cli(): @@ -51,6 +51,7 @@ def onboard(): create.add_command(create_asset_repo) list.add_command(list_host_machines) list.add_command(list_functions) +list.add_command(list_gpus) get.add_command(asset_repo_login) get.add_command(get_huggingface_model_status) onboard.add_command(onboard_model) @@ -58,4 +59,4 @@ def onboard(): def run_cli(): - cli() \ No newline at end of file + cli() diff --git a/aixplain/factories/cli/model_factory_cli.py b/aixplain/factories/cli/model_factory_cli.py index 264fadd9..b83d61cc 100644 --- a/aixplain/factories/cli/model_factory_cli.py +++ b/aixplain/factories/cli/model_factory_cli.py @@ -44,7 +44,7 @@ def list_host_machines(api_key: Optional[Text] = None) -> None: click.echo(ret_val_yaml) @click.command("functions") -@click.option("--verbose", default=False, +@click.option("--verbose", is_flag=True, help="List all function details, False by default.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") @@ -62,21 +62,37 @@ def list_functions(verbose: bool, api_key: Optional[Text] = None) -> None: ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) +@click.command("gpus") +@click.option("--api-key", default=None, + help="TEAM_API_KEY if not already set in environment.") +def list_gpus(api_key: Optional[Text] = None) -> None: + """CLI wrapper function for the LIST_GPUS function in ModelFactory. + + Args: + api_key (Text, optional): Team API key. Defaults to None. + Returns: + None + """ + ret_val = ModelFactory.list_gpus(api_key) + ret_val_yaml = yaml.dump(ret_val) + click.echo(ret_val_yaml) + @click.command("image-repo") @click.option("--name", help="Model name.") -@click.option("--hosting-machine", - help="Hosting machine code obtained from LIST_HOSTS.") -@click.option("--version", help="Model version.") @click.option("--description", help="Description of model.") @click.option("--function", help="Function name obtained from LIST_FUNCTIONS.") @click.option("--source-language", default="en", help="Model source language in 2-character 639-1 code or 3-character 639-3 code.") +@click.option("--input-modality", help="Input type (text, video, image, etc.)") +@click.option("--output-modality", help="Output type (text, video, image, etc.)") +@click.option("--documentation-url", default="", help="Link to model documentation.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") -def create_asset_repo(name: Text, hosting_machine: Text, version: Text, - description: Text, function: Text, - source_language: Text, - api_key: Optional[Text] = None) -> None: +def create_asset_repo(name: Text, description: Text, function: Text, + source_language: Text, input_modality: Text, + output_modality: Text, + documentation_url: Optional[Text] = "", + api_key: Optional[Text] = None) -> None: """CLI wrapper function for the CREATE_ASSET_REPO function in ModelFactory. Args: @@ -93,9 +109,10 @@ def create_asset_repo(name: Text, hosting_machine: Text, version: Text, Returns: None """ - ret_val = ModelFactory.create_asset_repo(name, hosting_machine, version, - description, function, - source_language, api_key) + ret_val = ModelFactory.create_asset_repo(name, description, function, + source_language, input_modality, + output_modality, documentation_url, + api_key) ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) @@ -119,8 +136,10 @@ def asset_repo_login(api_key: Optional[Text] = None) -> None: @click.option("--model-id", help="Model ID from CREATE_IMAGE_REPO.") @click.option("--image-tag", help="The tag of the image that you would like hosted.") @click.option("--image-hash", help="The hash of the image you would like onboarded.") +@click.option("--host-machine", default="", help="The machine on which to host the model.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def onboard_model(model_id: Text, image_tag: Text, image_hash: Text, + host_machine: Optional[Text] = "", api_key: Optional[Text] = None) -> None: """CLI wrapper function for the ONBOARD_MODEL function in ModelFactory. @@ -132,17 +151,20 @@ def onboard_model(model_id: Text, image_tag: Text, image_hash: Text, Returns: None """ - ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash, api_key) + ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash, + host_machine, api_key) ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) @click.command("hf-model") @click.option("--name", help="User-defined name for Hugging Face model.") @click.option("--hf-repo-id", help="Repository ID from Hugging Face in {supplier}/{model name} form.") -@click.option("--hf-token", help="Hugging Face token used to authenticate to this model.") +@click.option("--revision", default="", help="Commit hash of repository.") +@click.option("--hf-token", default=None, help="Hugging Face token used to authenticate to this model.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def deploy_huggingface_model(name: Text, hf_repo_id: Text, hf_token: Optional[Text] = None, + revision: Optional[Text] = None, api_key: Optional[Text] = None) -> None: """CLI wrapper function for the DEPLOY_HUGGINGFACE_MODEL function in ModelFactory. @@ -153,7 +175,7 @@ def deploy_huggingface_model(name: Text, hf_repo_id: Text, Returns: None """ - ret_val = ModelFactory.deploy_huggingface_model(name, hf_repo_id, hf_token, api_key) + ret_val = ModelFactory.deploy_huggingface_model(name, hf_repo_id, revision, hf_token, api_key) ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) @@ -172,4 +194,4 @@ def get_huggingface_model_status(model_id: Text, api_key: Optional[Text] = None) """ ret_val = ModelFactory.get_huggingface_model_status(model_id, api_key) ret_val_yaml = yaml.dump(ret_val) - click.echo(ret_val_yaml) \ No newline at end of file + click.echo(ret_val_yaml) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index 221fd94d..0fb845f1 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -270,6 +270,25 @@ def list_host_machines(cls, api_key: Optional[Text] = None) -> List[Dict]: for dictionary in response_dicts: del dictionary["id"] return response_dicts + + @classmethod + def list_gpus(cls, api_key: Optional[Text] = None) -> List[List[Text]]: + """List GPU names on which you can host your language model. + + Args: + api_key (Text, optional): Team API key. Defaults to None. + + Returns: + List[List[Text]]: List of all available GPUs and their prices. + """ + gpu_url = urljoin(config.BACKEND_URL, "sdk/model-onboarding/gpus") + if api_key: + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + response = _request_with_retry("get", gpu_url, headers=headers) + response_list = json.loads(response.text) + return response_list @classmethod def list_functions(cls, verbose: Optional[bool] = False, api_key: Optional[Text] = None) -> List[Dict]: @@ -310,12 +329,13 @@ def list_functions(cls, verbose: Optional[bool] = False, api_key: Optional[Text] def create_asset_repo( cls, name: Text, - hosting_machine: Text, - version: Text, description: Text, function: Text, source_language: Text, - api_key: Optional[Text] = None, + input_modality: Text, + output_modality: Text, + documentation_url: Optional[Text] = "", + api_key: Optional[Text] = None ) -> Dict: """Creates an image repository for this model and registers it in the platform backend. @@ -342,27 +362,36 @@ def create_asset_repo( function_id = function_dict["id"] if function_id is None: raise Exception("Invalid function name") - create_url = urljoin(config.BACKEND_URL, "sdk/models/register") + create_url = urljoin(config.BACKEND_URL, f"sdk/models/onboard") logging.debug(f"URL: {create_url}") if api_key: headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} else: headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} - always_on = False - is_async = False # Hard-coded to False for first release + payload = { - "name": name, - "hostingMachine": hosting_machine, - "alwaysOn": always_on, - "version": version, - "description": description, - "function": function_id, - "isAsync": is_async, - "sourceLanguage": source_language, + "model": { + "name": name, + "description": description, + "connectionType": [ + "synchronous" + ], + "function": function_id, + "modalities": [ + f"{input_modality}-{output_modality}" + ], + "documentationUrl": documentation_url, + "sourceLanguage": source_language + }, + "source": "aixplain-ecr", + "onboardingParams": { + } } - payload = json.dumps(payload) logging.debug(f"Body: {str(payload)}") - response = _request_with_retry("post", create_url, headers=headers, data=payload) + response = _request_with_retry("post", create_url, headers=headers, json=payload) + + assert response.status_code == 201 + return response.json() @classmethod @@ -379,20 +408,23 @@ def asset_repo_login(cls, api_key: Optional[Text] = None) -> Dict: login_url = urljoin(config.BACKEND_URL, "sdk/ecr/login") logging.debug(f"URL: {login_url}") if api_key: - headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} else: - headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} response = _request_with_retry("post", login_url, headers=headers) + print(f"Response: {response}") response_dict = json.loads(response.text) return response_dict @classmethod - def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_key: Optional[Text] = None) -> Dict: + def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, host_machine: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict: """Onboard a model after its image has been pushed to ECR. Args: model_id (Text): Model ID obtained from CREATE_ASSET_REPO. image_tag (Text): Image tag to be onboarded. + image_hash (Text): Image digest. + host_machine (Text, optional): Machine on which to host model. api_key (Text, optional): Team API key. Defaults to None. Returns: Dict: Backend response @@ -403,18 +435,18 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} else: headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} - payload = {"image": image_tag, "sha": image_hash} - payload = json.dumps(payload) + payload = {"image": image_tag, "sha": image_hash, "hostMachine": host_machine} logging.debug(f"Body: {str(payload)}") - response = _request_with_retry("post", onboard_url, headers=headers, data=payload) - message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed." - logging.info(message) + response = _request_with_retry("post", onboard_url, headers=headers, json=payload) + if response.status_code == 201: + message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed." + logging.info(message) + else: + message = "An error has occurred. Please make sure your model_id is valid and your host_machine, if set, is a valid option from the LIST_GPUS function." return response @classmethod - def deploy_huggingface_model( - cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None - ) -> Dict: + def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, revision: Optional[Text] = "", hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict: """Onboards and deploys a Hugging Face large language model. Args: @@ -441,7 +473,12 @@ def deploy_huggingface_model( "sourceLanguage": "en", }, "source": "huggingface", - "onboardingParams": {"hf_model_name": model_name, "hf_supplier": supplier, "hf_token": hf_token}, + "onboardingParams": { + "hf_supplier": supplier, + "hf_model_name": model_name, + "hf_token": hf_token, + "revision": revision + } } response = _request_with_retry("post", deploy_url, headers=headers, json=body) logging.debug(response.text) diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index d591772b..16bf4541 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -97,7 +97,6 @@ def run( reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None. """ from aixplain.factories.model_factory import ModelFactory - model = ModelFactory.get(self.id) payload = { "function": self.function, diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 12c96977..06811332 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -267,7 +267,6 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): """ from aixplain.enums.asset_status import AssetStatus from aixplain.modules.finetune.status import FinetuneStatus - headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} resp = None try: @@ -278,7 +277,6 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): finetune_status = AssetStatus(resp["finetuneStatus"]) model_status = AssetStatus(resp["modelStatus"]) logs = sorted(resp["logs"], key=lambda x: float(x["epoch"])) - target_epoch = None if after_epoch is not None: logs = [log for log in logs if float(log["epoch"]) > after_epoch] @@ -286,7 +284,6 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): target_epoch = float(logs[0]["epoch"]) elif len(logs) > 0: target_epoch = float(logs[-1]["epoch"]) - if target_epoch is not None: log = None for log_ in logs: @@ -298,7 +295,6 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): log["trainLoss"] = log_["trainLoss"] if log_["evalLoss"] is not None: log["evalLoss"] = log_["evalLoss"] - status = FinetuneStatus( status=finetune_status, model_status=model_status, diff --git a/docs/user/user_doc.md b/docs/user/user_doc.md index a7fa2178..4466e121 100644 --- a/docs/user/user_doc.md +++ b/docs/user/user_doc.md @@ -76,75 +76,66 @@ response = model.run( ``` ### Deploying Hugging Face Large Language Models + You can deploy your very own Hugging Face large language models on our platform using the aiXplain SDK: ```console -$ aixplain onboard hf-model --name --hf-repo-id --hf-token [--api-key ] +$ aixplain onboard hf-model --name --hf-repo-id --revision --hf-token [--api-key ] ``` This command will return your model's ID. The on-boarding process will take 5 to 15 minutes, during which you can check the on-boarding status by running the following: ```console $ aixplain get hf-model-status --model-id [--api-key ] ``` -Once the on-boarding process has completed, you can use this newly-deployed large language model just like any other model on our platform. Note that our platform currently only supports language models up 7 billion parameters in size (~30 GB), so any attempts to deploy larger models will result in an error message. +Once the on-boarding process has completed, you can use this newly-deployed large language model just like any other private model on our platform. Note that our platform currently only supports language models up 7 billion parameters in size (~30 GB), so any attempts to deploy larger models will result in an error message. ### Uploading Models +## Uploading Models In addition to exploring and running models, the aiXplain SDK allows you to upload your own models to the aiXplain platform. This requires a working model image in line with the template specified [here](https://github.com/aixplain/model-interfaces/blob/main/docs/user/model_setup.md). [These](https://github.com/aixplain/model-interfaces/tree/main) are the interfaces with which you will be working. You will also be required to have an aiXplain account as well as a TEAM_API_KEY which should be set either as an environment variable or passed into each of the following functions. -First, choose a hosting machine appropriate for your model. Note down the host machines "code". You can list the available hosting machines' specifications by running the following: -```console -$ aixplain list hosts [--api-key ] -- code: aix-2c-8g-od - cores: 2 - hourlyCost: 0.12 - memory: 8 - type: on-demand -- code: aix-2c-8g - cores: 2 - hourlyCost: 0.096 - memory: 8 - type: always-on - ... -``` Note: For any of the CLI commands, running `aixplain [verb] [resource] --help` will display a description of each argument that should be passed into that command. The `api-key` parameter is optional and is only used if the environment variable isn't set or you would like to override the existing environment variable. Find a supported function type that best describes your model's purpose. Note down the function's ID. ```console -$ aixplain list functions [--verbose ] [--api-key ] -filteredFrom: 55 +aixplain list functions [--verbose] [--api-key ] +filteredFrom: 63 items: -- name: Language Identification -- name: OCR -- name: Image Label Detection -- name: Video Forced Alignment -- name: Offensive Language Identification -- name: Audio Forced Alignment -- name: Video Generation -- name: Split On Silence -- name: Referenceless Audio Generation Metric -- name: Audio Generation Metric -- name: Speaker Diarization Video -- name: Referenceless Text Generation Metric Default +- modalities: + - text-number + name: Object Detection +- modalities: + - text-label + name: Language Identification +- modalities: + - image-text + - document-text + name: OCR +- modalities: + - image-label + name: Image Label Detection +- modalities: + - image-text + name: Image Captioning ... ``` -`verbose` is optional and is set to False by default, meaning only the function names are listed. Setting this to True will additionally list the function ID, output, and params. Again, `api-key` is optional. +`verbose` is optional and is set to False by default. Again, `api-key` is optional. Once you have chosen a suitable host machine and function, register your model and create an image repository: ```console -$ aixplain create image-repo --name --hosting-machine --version --description --function --source-language [--api-key ] +aixplain create image-repo --name --description --function --source-language --input-modality --output-modality --documentation-url [--api-key ] { "repoName": , "modelId": } ``` -`name` is your model's name. `hosting-machine` should include the code of the hosting machine you would like to use. The `version` field should be set to your model's version number. `description` should hold a short summary of your model's purpose. Specify the function name most closely describe your model's purpose in the `function` field. Finally, `source-language` should contain your model's source language. +`name` is your model's name. `description` should hold a short summary of your model's purpose. Specify the function name most closely describe your model's purpose in the `function` field. Finally, `source-language` should contain your model's source language. This returns a model ID and a repository name. Next, obtain login credentials for the newly created repository: ```console -$ aixplain get image-repo-login [--api-key ] +aixplain get image-repo-login [--api-key ] { "username": , "password": , @@ -152,34 +143,45 @@ $ aixplain get image-repo-login [--api-key ] } ``` -These credentials are valid for 12 hours, after which you much again log in for a fresh set of valid credentials. If you are using Docker, you can use these credentials to log in with the following: +These credentials are valid for 12 hours, after which you must again log in for a fresh set of valid credentials. If you are using Docker, you can use these credentials to log in with the following: ```console docker login --username $USERNAME --password $PASSWORD 535945872701.dkr.ecr.us-east-1.amazonaws.com ``` You must first build your image using the following: ```console -$ docker build . -t 535945872701.dkr.ecr.us-east-1.amazonaws.com/: +docker build . -t $REGISTRY/$REPO_NAME: ``` -where the `` is that returned by `aixplain create image-repo` and `` is some sort of descriptor (usually version number) for your specific model. +where `` is some sort of descriptor (usually a version tag like v0.0.1) for your specific model. -Next, tag your image to match the registry and repository name given in the previous steps. If you are using Docker, this would look like the following: +Push the newly tagged image to the corresponding repository: ```console -$ docker tag {$REGISTRY}/{$REPO_NAME}: +$ docker push $REGISTRY/$REPO_NAME: ``` -Push the newly tagged image to the corresponding repository: + +Once this is done, onboard the model: ```console -$ docker push {$REGISTRY}/{$REPO_NAME}: +$ aixplain onboard model --model-id --image-tag --image-hash --host-machine [--api-key ] ``` +`model-id` should be the model ID returned by the image-create-repo function used earlier. `image-tag` should be set to whatever string you used to tag your model image. The image sha256 hash can be obtained by running `docker images --digests`. Choose the hash corresponding to the image you would like onboarded. `host-machine` should contain the machine code on which to host the model. A list of all the models can be obtained via `aixplain list gpus` as follow: -Once this is done, onboard the model: +Note down the host machines "code": ```console -$ aixplain onboard model --model-id --image-tag --image-hash [--api-key ] +aixplain list gpus [--api-key ] +- - nvidia-t4-1 + - 'Price: 0.752' + - 'Units: $/hr' +- - nvidia-a10g-1 + - 'Price: 1.006' + - 'Units: $/hr' +- - nvidia-a10g-4 + - 'Price: 5.672' + - 'Units: $/hr' + ... ``` -`model-id` should be the model ID returned by the image-create-repo function used earlier. `image-tag` should be set to whatever string you used to tag your model image. The image sha256 hash can be obtained by running `docker images --digests`. Choose the hash corresponding to the image you would like onboarded. -This will send an email to an aiXplain associate to finalize the onboarding process. +This will send an email to an aiXplain associate to finalize the onboarding process. ## Pipelines [Design](https://aixplain.com/platform/studio/) is aiXplain’s no-code AI pipeline builder tool that accelerates AI development by providing a seamless experience to build complex AI systems and deploy them within minutes. You can visit our platform and design your own custom pipeline [here](https://platform.aixplain.com/studio). diff --git a/tests/functional/finetune/data/finetune_test_cost_estimation.json b/tests/functional/finetune/data/finetune_test_cost_estimation.json index 80f4d331..44707255 100644 --- a/tests/functional/finetune/data/finetune_test_cost_estimation.json +++ b/tests/functional/finetune/data/finetune_test_cost_estimation.json @@ -9,4 +9,4 @@ {"model_name": "MPT 7b storywriter", "model_id": "6551a870bf42e6037ab109db", "dataset_name": "Test text generation dataset"}, {"model_name": "BloomZ 7b", "model_id": "6551ab17bf42e6037ab109e0", "dataset_name": "Test text generation dataset"}, {"model_name": "BloomZ 7b MT", "model_id": "656e80147ca71e334752d5a3", "dataset_name": "Test text generation dataset"} -] \ No newline at end of file +] diff --git a/tests/functional/finetune/data/finetune_test_end2end.json b/tests/functional/finetune/data/finetune_test_end2end.json index 80768de9..f744f0e6 100644 --- a/tests/functional/finetune/data/finetune_test_end2end.json +++ b/tests/functional/finetune/data/finetune_test_end2end.json @@ -10,7 +10,7 @@ { "model_name": "aiR", "model_id": "6499cc946eb5633de15d82a1", - "dataset_name": "Test search dataset", + "dataset_name": "Test search dataset metadata", "inference_data": "Hello!", "required_dev": false, "search_metadata": false @@ -23,4 +23,4 @@ "required_dev": false, "search_metadata": false } -] \ No newline at end of file +] diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index ffa9ad5a..7b45613c 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -130,4 +130,4 @@ def test_prompt_validator(validate_prompt_input_map): finetune = FinetuneFactory.create( str(uuid.uuid4()), dataset_list, model, prompt_template=validate_prompt_input_map["prompt_template"] ) - assert exc_info.type is AssertionError \ No newline at end of file + assert exc_info.type is AssertionError From 16eb2e116f190bc8380312487327007570a0f4d1 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:03:06 -0300 Subject: [PATCH 008/105] Create and get Pipelines with api key as input parameter (#187) Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/pipeline_factory.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/aixplain/factories/pipeline_factory.py b/aixplain/factories/pipeline_factory.py index 404a5556..4ebdc439 100644 --- a/aixplain/factories/pipeline_factory.py +++ b/aixplain/factories/pipeline_factory.py @@ -73,7 +73,9 @@ def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: resp = None try: url = urljoin(cls.backend_url, f"sdk/pipelines/{pipeline_id}") - if cls.aixplain_key != "": + if api_key is not None: + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} + elif cls.aixplain_key != "": headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} else: headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} @@ -86,7 +88,7 @@ def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: resp["api_key"] = api_key pipeline = cls.__from_response(resp) return pipeline - except Exception as e: + except Exception: status_code = 400 if resp is not None and "statusCode" in resp: status_code = resp["statusCode"] @@ -172,7 +174,7 @@ def list( else: headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - assert 0 < page_size <= 100, f"Pipeline List Error: Page size must be greater than 0 and not exceed 100." + assert 0 < page_size <= 100, "Pipeline List Error: Page size must be greater than 0 and not exceed 100." payload = { "pageSize": page_size, "pageNumber": page_number, @@ -223,13 +225,16 @@ def list( return {"results": pipelines, "page_total": page_total, "page_number": page_number, "total": total} @classmethod - def create(cls, name: Text, pipeline: Union[Text, Dict], status: Text = "draft") -> Pipeline: + def create( + cls, name: Text, pipeline: Union[Text, Dict], status: Text = "draft", api_key: Optional[Text] = None + ) -> Pipeline: """Pipeline Creation Args: name (Text): Pipeline Name pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file status (Text, optional): Status of the pipeline. Currently only draft pipelines can be saved. Defaults to "draft". + api_key (Optional[Text], optional): _description_. Defaults to None. Raises: Exception: Currently just the creation of draft pipelines are supported @@ -250,11 +255,12 @@ def create(cls, name: Text, pipeline: Union[Text, Dict], status: Text = "draft") # prepare payload payload = {"name": name, "status": "draft", "architecture": pipeline} url = urljoin(cls.backend_url, "sdk/pipelines") - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + api_key = api_key if api_key is not None else config.TEAM_API_KEY + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} logging.info(f"Start service for POST Create Pipeline - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) response = r.json() - return Pipeline(response["id"], name, config.TEAM_API_KEY) + return Pipeline(response["id"], name, api_key) except Exception as e: raise Exception(e) From 04246b152898640adaa975bef7969a8eab5853d6 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 17 Jun 2024 13:15:50 -0300 Subject: [PATCH 009/105] M 6769474660 save pipelines (#191) * Saving pipelines as asset * Pipeline delete service * Function type AI to lower case --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/pipeline_factory.py | 15 ++++----- aixplain/modules/pipeline.py | 39 +++++++++++++++++------ tests/functional/pipelines/create_test.py | 7 ++-- tests/unit/pipeline_test.py | 3 +- 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/aixplain/factories/pipeline_factory.py b/aixplain/factories/pipeline_factory.py index 4ebdc439..fac42283 100644 --- a/aixplain/factories/pipeline_factory.py +++ b/aixplain/factories/pipeline_factory.py @@ -225,16 +225,13 @@ def list( return {"results": pipelines, "page_total": page_total, "page_number": page_number, "total": total} @classmethod - def create( - cls, name: Text, pipeline: Union[Text, Dict], status: Text = "draft", api_key: Optional[Text] = None - ) -> Pipeline: - """Pipeline Creation + def create(cls, name: Text, pipeline: Union[Text, Dict], api_key: Optional[Text] = None) -> Pipeline: + """Draft Pipeline Creation Args: name (Text): Pipeline Name pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file - status (Text, optional): Status of the pipeline. Currently only draft pipelines can be saved. Defaults to "draft". - api_key (Optional[Text], optional): _description_. Defaults to None. + api_key (Optional[Text], optional): Team API Key to create the Pipeline. Defaults to None. Raises: Exception: Currently just the creation of draft pipelines are supported @@ -243,15 +240,17 @@ def create( Pipeline: instance of the new pipeline """ try: - assert status == "draft", "Pipeline Creation Error: Currently just the creation of draft pipelines are supported." if isinstance(pipeline, str) is True: _, ext = os.path.splitext(pipeline) assert ( os.path.exists(pipeline) and ext == ".json" - ), "Pipeline Creation Error: Make sure the pipeline to be save is in a JSON file." + ), "Pipeline Creation Error: Make sure the pipeline to be saved is in a JSON file." with open(pipeline) as f: pipeline = json.load(f) + for i, node in enumerate(pipeline["nodes"]): + if "functionType" in node and node["functionType"] == "AI": + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload payload = {"name": name, "status": "draft", "architecture": pipeline} url = urljoin(cls.backend_url, "sdk/pipelines") diff --git a/aixplain/modules/pipeline.py b/aixplain/modules/pipeline.py index 3de49756..b079e2a3 100644 --- a/aixplain/modules/pipeline.py +++ b/aixplain/modules/pipeline.py @@ -101,12 +101,12 @@ def __polling( time.sleep(wait_time) if wait_time < 60: wait_time *= 1.1 - except Exception as e: + except Exception: logging.error(f"Polling for Pipeline: polling for {name} : Continue") if response_body and response_body["status"] == "SUCCESS": try: logging.debug(f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}") - except Exception as e: + except Exception: logging.error(f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}") else: logging.error( @@ -130,7 +130,7 @@ def poll(self, poll_url: Text, name: Text = "pipeline_process") -> Dict: try: resp = r.json() logging.info(f"Single Poll for Pipeline: Status of polling for {name} : {resp}") - except Exception as e: + except Exception: resp = {"status": "FAILED"} return resp @@ -206,7 +206,7 @@ def __prepare_payload(self, data: Union[Text, Dict], data_asset: Optional[Union[ if isinstance(payload, int) is True or isinstance(payload, float) is True: payload = str(payload) payload = {"data": payload} - except Exception as e: + except Exception: payload = {"data": data} else: payload = {} @@ -251,7 +251,7 @@ def __prepare_payload(self, data: Union[Text, Dict], data_asset: Optional[Union[ if target_row.id == data[node_label]: data_found = True break - if data_found == True: + if data_found is True: break except Exception: data_asset_found = False @@ -303,17 +303,18 @@ def run_async( poll_url = resp["url"] response = {"status": "IN_PROGRESS", "url": poll_url} - except Exception as e: + except Exception: response = {"status": "FAILED"} if resp is not None: response["error"] = resp return response - def update(self, pipeline: Union[Text, Dict]): + def update(self, pipeline: Union[Text, Dict], save_as_asset: bool = False): """Update Pipeline Args: pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file + save_as_asset (bool, optional): Save as asset (True) or draft (False). Defaults to False. Raises: Exception: Make sure the pipeline to be save is in a JSON file. @@ -323,12 +324,18 @@ def update(self, pipeline: Union[Text, Dict]): _, ext = os.path.splitext(pipeline) assert ( os.path.exists(pipeline) and ext == ".json" - ), "Pipeline Update Error: Make sure the pipeline to be save is in a JSON file." + ), "Pipeline Update Error: Make sure the pipeline to be saved is in a JSON file." with open(pipeline) as f: pipeline = json.load(f) + for i, node in enumerate(pipeline["nodes"]): + if "functionType" in node and node["functionType"] == "AI": + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload - payload = {"name": self.name, "status": "draft", "architecture": pipeline} + status = "draft" + if save_as_asset is True: + status = "onboarded" + payload = {"name": self.name, "status": status, "architecture": pipeline} url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}") @@ -337,3 +344,17 @@ def update(self, pipeline: Union[Text, Dict]): logging.info(f"Pipeline {response['id']} Updated.") except Exception as e: raise Exception(e) + + def delete(self) -> None: + """Delete Dataset service""" + try: + url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for DELETE Pipeline - {url} - {headers}") + r = _request_with_retry("delete", url, headers=headers) + if r.status_code != 200: + raise Exception() + except Exception: + message = "Pipeline Deletion Error: Make sure the pipeline exists and you are the owner." + logging.error(message) + raise Exception(f"{message}") diff --git a/tests/functional/pipelines/create_test.py b/tests/functional/pipelines/create_test.py index f2c1a9c9..6431bd41 100644 --- a/tests/functional/pipelines/create_test.py +++ b/tests/functional/pipelines/create_test.py @@ -30,6 +30,7 @@ def test_create_pipeline_from_json(): assert isinstance(pipeline, Pipeline) assert pipeline.id != "" + pipeline.delete() def test_create_pipeline_from_string(): @@ -42,6 +43,7 @@ def test_create_pipeline_from_string(): assert isinstance(pipeline, Pipeline) assert pipeline.id != "" + pipeline.delete() def test_update_pipeline(): @@ -52,13 +54,14 @@ def test_update_pipeline(): pipeline_name = str(uuid4()) pipeline = PipelineFactory.create(name=pipeline_name, pipeline=pipeline_dict) - pipeline.update(pipeline=pipeline_json) + pipeline.update(pipeline=pipeline_json, save_as_asset=True) assert isinstance(pipeline, Pipeline) assert pipeline.id != "" + pipeline.delete() def test_create_pipeline_wrong_path(): pipeline_name = str(uuid4()) with pytest.raises(Exception): - pipeline = PipelineFactory.create(name=pipeline_name, pipeline="/") + PipelineFactory.create(name=pipeline_name, pipeline="/") diff --git a/tests/unit/pipeline_test.py b/tests/unit/pipeline_test.py index 68a399aa..e983a298 100644 --- a/tests/unit/pipeline_test.py +++ b/tests/unit/pipeline_test.py @@ -24,7 +24,6 @@ from aixplain.factories import PipelineFactory from aixplain.modules import Pipeline from urllib.parse import urljoin -import pytest def test_create_pipeline(): @@ -34,6 +33,6 @@ def test_create_pipeline(): ref_response = {"id": "12345"} mock.post(url, headers=headers, json=ref_response) ref_pipeline = Pipeline(id="12345", name="Pipeline Test", api_key=config.TEAM_API_KEY) - hyp_pipeline = PipelineFactory.create(pipeline={}, name="Pipeline Test") + hyp_pipeline = PipelineFactory.create(pipeline={"nodes": []}, name="Pipeline Test") assert hyp_pipeline.id == ref_pipeline.id assert hyp_pipeline.name == ref_pipeline.name From 73021a79c4670d1cfdd1436199af5748285fc6a1 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:40:44 -0300 Subject: [PATCH 010/105] M 6769474660 save pipelines (#192) * Saving pipelines as asset * Pipeline delete service * Function type AI to lower case * API Key as a parameter --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/modules/pipeline.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/aixplain/modules/pipeline.py b/aixplain/modules/pipeline.py index b079e2a3..ed131018 100644 --- a/aixplain/modules/pipeline.py +++ b/aixplain/modules/pipeline.py @@ -309,12 +309,13 @@ def run_async( response["error"] = resp return response - def update(self, pipeline: Union[Text, Dict], save_as_asset: bool = False): + def update(self, pipeline: Union[Text, Dict], save_as_asset: bool = False, api_key: Optional[Text] = None): """Update Pipeline Args: pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file save_as_asset (bool, optional): Save as asset (True) or draft (False). Defaults to False. + api_key (Optional[Text], optional): Team API Key to create the Pipeline. Defaults to None. Raises: Exception: Make sure the pipeline to be save is in a JSON file. @@ -337,7 +338,8 @@ def update(self, pipeline: Union[Text, Dict], save_as_asset: bool = False): status = "onboarded" payload = {"name": self.name, "status": status, "architecture": pipeline} url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + api_key = api_key if api_key is not None else config.TEAM_API_KEY + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("put", url, headers=headers, json=payload) response = r.json() From 474602b7b94126b442c83441e2545b7925c79702 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 26 Jun 2024 08:50:04 -0300 Subject: [PATCH 011/105] Solving bug when LLM parameters are set on data (#196) Co-authored-by: Thiago Castro Ferreira --- aixplain/modules/model/llm_model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 349ea595..14b9c7f4 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -196,12 +196,12 @@ def run_async( payload = {"data": data} parameters.update( { - "context": context, - "prompt": prompt, - "history": history, - "temperature": temperature, - "max_tokens": max_tokens, - "top_p": top_p, + "context": payload["context"] if "context" in payload else context, + "prompt": payload["prompt"] if "prompt" in payload else prompt, + "history": payload["history"] if "history" in payload else history, + "temperature": payload["temperature"] if "temperature" in payload else temperature, + "max_tokens": payload["max_tokens"] if "max_tokens" in payload else max_tokens, + "top_p": payload["top_p"] if "top_p" in payload else top_p, } ) payload.update(parameters) From 3695686a5e8c34ebdc10301acad1a46f957905d6 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Wed, 3 Jul 2024 05:52:17 -0300 Subject: [PATCH 012/105] Fix pipeline functional test (#200) --- tests/functional/pipelines/run_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index e8bc4d9c..25fadaf4 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -224,7 +224,7 @@ def test_run_router(input_data: str, output_data: str, version: str): @pytest.mark.parametrize( - "input_data,output_data", + "input_data,output_data,version", [ ("I love it.", "PositiveOutput", "2.0"), ("I hate it.", "NegativeOutput", "2.0"), From 90140617466393d7a7baf75e350ab4f098bbfe47 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Sat, 13 Jul 2024 23:21:52 +0200 Subject: [PATCH 013/105] M 6656407247 agentification (#197) * Agent CRUD * Fixes in the structure * Delete agent method * Add input/output to PipelineFactory and use api_key from parameter (#182) * Enabling pipeline tools * M 6875703542 agentification deployment (#195) * First changes for agent integration with backend * Official creation and deletion services * Running agent method --------- Co-authored-by: Thiago Castro Ferreira * Fix bug when supplier and tools are not given * Add agents functional tests (#204) --------- Co-authored-by: Thiago Castro Ferreira Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira Co-authored-by: Lucas Pavanelli --- aixplain/factories/__init__.py | 1 + aixplain/factories/agent_factory/__init__.py | 166 +++++++++++++++ aixplain/factories/agent_factory/utils.py | 48 +++++ aixplain/factories/pipeline_factory.py | 9 +- aixplain/modules/__init__.py | 2 + aixplain/modules/agent/__init__.py | 195 ++++++++++++++++++ aixplain/modules/agent/tool.py | 59 ++++++ aixplain/modules/agent/tool/__init__.py | 53 +++++ aixplain/modules/agent/tool/model_tool.py | 60 ++++++ aixplain/modules/agent/tool/pipeline_tool.py | 52 +++++ aixplain/modules/asset.py | 8 +- aixplain/modules/finetune/__init__.py | 5 +- aixplain/modules/metric.py | 3 - aixplain/modules/model/__init__.py | 17 +- pyproject.toml | 2 +- .../functional/agent/agent_functional_test.py | 75 +++++++ .../agent/data/agent_test_end2end.json | 14 ++ 17 files changed, 752 insertions(+), 17 deletions(-) create mode 100644 aixplain/factories/agent_factory/__init__.py create mode 100644 aixplain/factories/agent_factory/utils.py create mode 100644 aixplain/modules/agent/__init__.py create mode 100644 aixplain/modules/agent/tool.py create mode 100644 aixplain/modules/agent/tool/__init__.py create mode 100644 aixplain/modules/agent/tool/model_tool.py create mode 100644 aixplain/modules/agent/tool/pipeline_tool.py create mode 100644 tests/functional/agent/agent_functional_test.py create mode 100644 tests/functional/agent/data/agent_test_end2end.json diff --git a/aixplain/factories/__init__.py b/aixplain/factories/__init__.py index 36147c6e..7b876899 100644 --- a/aixplain/factories/__init__.py +++ b/aixplain/factories/__init__.py @@ -20,6 +20,7 @@ limitations under the License. """ from .asset_factory import AssetFactory +from .agent_factory import AgentFactory from .benchmark_factory import BenchmarkFactory from .corpus_factory import CorpusFactory from .data_factory import DataFactory diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py new file mode 100644 index 00000000..36380a76 --- /dev/null +++ b/aixplain/factories/agent_factory/__init__.py @@ -0,0 +1,166 @@ +__author__ = "lucaspavanelli" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Thiago Castro Ferreira and Lucas Pavanelli +Date: May 16th 2024 +Description: + Agent Factory Class +""" + +import json +import logging + +from aixplain.enums.supplier import Supplier +from aixplain.modules.agent import Agent, Tool +from aixplain.modules.agent.tool.model_tool import ModelTool +from aixplain.modules.agent.tool.pipeline_tool import PipelineTool +from aixplain.utils import config +from typing import Dict, List, Optional, Text, Union + +from aixplain.factories.agent_factory.utils import build_agent +from aixplain.utils.file_utils import _request_with_retry +from urllib.parse import urljoin + + +class AgentFactory: + @classmethod + def create( + cls, + name: Text, + llm_id: Text, + tools: List[Tool] = [], + description: Text = "", + api_key: Text = config.TEAM_API_KEY, + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", + version: Optional[Text] = None, + ) -> Agent: + """Create a new agent in the platform.""" + try: + agent = None + url = urljoin(config.BACKEND_URL, "sdk/agents") + headers = {"x-api-key": api_key} + + if isinstance(supplier, dict): + supplier = supplier["code"] + elif isinstance(supplier, Supplier): + supplier = supplier.value["code"] + + tool_payload = [] + for tool in tools: + if isinstance(tool, ModelTool): + tool_payload.append( + { + "function": tool.function.value, + "type": "model", + "description": tool.description, + "supplier": tool.supplier.value["code"] if tool.supplier else None, + "version": tool.version if tool.version else None, + } + ) + elif isinstance(tool, PipelineTool): + tool_payload.append( + { + "assetId": tool.pipeline, + "description": tool.description, + "type": "pipeline", + } + ) + else: + raise Exception("Agent Creation Error: Tool type not supported.") + + payload = { + "name": name, + "assets": tool_payload, + "description": description, + "supplier": supplier, + "version": version, + } + if llm_id is not None: + payload["llmId"] = llm_id + + logging.info(f"Start service for POST Create Agent - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) + if 200 <= r.status_code < 300: + response = r.json() + agent = build_agent(payload=response, api_key=api_key) + else: + error = r.json() + error_msg = "Agent Onboarding Error: Please contant the administrators." + if "message" in error: + msg = error["message"] + if error["message"] == "err.name_already_exists": + msg = "Agent name already exists." + elif error["message"] == "err.asset_is_not_available": + msg = "Some the tools are not available." + error_msg = f"Agent Onboarding Error (HTTP {r.status_code}): {msg}" + logging.exception(error_msg) + raise Exception(error_msg) + except Exception as e: + raise Exception(e) + return agent + + @classmethod + def list(cls) -> Dict: + """List all agents available in the platform.""" + url = urljoin(config.BACKEND_URL, "sdk/agents") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + + payload = {} + logging.info(f"Start service for GET List Agents - {url} - {headers} - {json.dumps(payload)}") + try: + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + + if 200 <= r.status_code < 300: + agents, page_total, total = [], 0, 0 + results = resp + page_total = len(results) + total = len(results) + logging.info(f"Response for GET List Agents - Page Total: {page_total} / Total: {total}") + for agent in results: + agents.append(build_agent(agent)) + return {"results": agents, "page_total": page_total, "page_number": 0, "total": total} + else: + error_msg = "Agent Listing Error: Please contant the administrators." + if "message" in resp: + msg = resp["message"] + error_msg = f"Agent Listing Error (HTTP {r.status_code}): {msg}" + logging.exception(error_msg) + raise Exception(error_msg) + except Exception as e: + raise Exception(e) + + @classmethod + def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> Agent: + """Get agent by id.""" + url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent_id}") + if config.AIXPLAIN_API_KEY != "": + headers = {"x-aixplain-key": f"{config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} + else: + api_key = api_key if api_key is not None else config.TEAM_API_KEY + headers = {"x-api-key": api_key, "Content-Type": "application/json"} + logging.info(f"Start service for GET Agent - {url} - {headers}") + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + if 200 <= r.status_code < 300: + return build_agent(resp) + else: + msg = "Please contant the administrators." + if "message" in resp: + msg = resp["message"] + error_msg = f"Agent Get Error (HTTP {r.status_code}): {msg}" + raise Exception(error_msg) diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py new file mode 100644 index 00000000..6363a08e --- /dev/null +++ b/aixplain/factories/agent_factory/utils.py @@ -0,0 +1,48 @@ +__author__ = "thiagocastroferreira" + +import aixplain.utils.config as config +from aixplain.enums import Function, Supplier +from aixplain.enums.asset_status import AssetStatus +from aixplain.modules.agent import Agent, ModelTool, PipelineTool +from typing import Dict, Text +from urllib.parse import urljoin + + +def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: + """Instantiate a new agent in the platform.""" + tools = payload["assets"] + for i, tool in enumerate(tools): + if tool["type"] == "model": + for supplier in Supplier: + if tool["supplier"] is not None and tool["supplier"].lower() in [ + supplier.value["code"].lower(), + supplier.value["name"].lower(), + ]: + tool["supplier"] = supplier + break + + tool = ModelTool( + function=Function(tool["function"]), + supplier=tool["supplier"], + version=tool["version"], + ) + elif tool["type"] == "pipeline": + tool = PipelineTool(description=tool["description"], pipeline=tool["assetId"]) + else: + raise Exception("Agent Creation Error: Tool type not supported.") + tools[i] = tool + + agent = Agent( + id=payload["id"], + name=payload["name"] if "name" in payload else "", + tools=tools, + description=payload["description"] if "description" in payload else "", + supplier=payload["teamId"] if "teamId" in payload else None, + version=payload["version"] if "version" in payload else None, + cost=payload["cost"] if "cost" in payload else None, + llm_id=payload["llmId"] if "llmId" in payload else "6646261c6eb563165658bbb1", + api_key=api_key, + status=AssetStatus(payload["status"]), + ) + agent.url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}/run") + return agent diff --git a/aixplain/factories/pipeline_factory.py b/aixplain/factories/pipeline_factory.py index fac42283..61bcb214 100644 --- a/aixplain/factories/pipeline_factory.py +++ b/aixplain/factories/pipeline_factory.py @@ -45,6 +45,11 @@ class PipelineFactory: aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL + @classmethod + def __get_typed_nodes(cls, response: Dict, type: str) -> List[Dict]: + # read "nodes" field from response and return the nodes that are marked by "type": type + return [node for node in response["nodes"] if node["type"].lower() == type.lower()] + @classmethod def __from_response(cls, response: Dict) -> Pipeline: """Converts response Json to 'Pipeline' object @@ -57,7 +62,9 @@ def __from_response(cls, response: Dict) -> Pipeline: """ if "api_key" not in response: response["api_key"] = config.TEAM_API_KEY - return Pipeline(response["id"], response["name"], response["api_key"]) + input = cls.__get_typed_nodes(response, "input") + output = cls.__get_typed_nodes(response, "output") + return Pipeline(response["id"], response["name"], response["api_key"], input=input, output=output) @classmethod def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index 488c8c2f..c7246dac 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -33,3 +33,5 @@ from .finetune.status import FinetuneStatus from .benchmark import Benchmark from .benchmark_job import BenchmarkJob +from .agent import Agent +from .agent.tool import Tool diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py new file mode 100644 index 00000000..2f244d56 --- /dev/null +++ b/aixplain/modules/agent/__init__.py @@ -0,0 +1,195 @@ +__author__ = "aiXplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Lucas Pavanelli and Thiago Castro Ferreira +Date: May 16th 2024 +Description: + Agentification Class +""" +import json +import logging +import time +import traceback + +from aixplain.utils.file_utils import _request_with_retry +from aixplain.enums.supplier import Supplier +from aixplain.enums.asset_status import AssetStatus +from aixplain.modules.model import Model +from aixplain.modules.agent.tool import Tool +from aixplain.modules.agent.tool.model_tool import ModelTool +from aixplain.modules.agent.tool.pipeline_tool import PipelineTool +from typing import Dict, List, Text, Optional, Union +from urllib.parse import urljoin + +from aixplain.utils import config + + +class Agent(Model): + """Advanced AI system capable of performing tasks by leveraging specialized software tools and resources from aiXplain marketplace. + + Attributes: + id (Text): ID of the Agent + name (Text): Name of the Agent + tools (List[Tool]): List of tools that the Agent uses. + description (Text, optional): description of the Agent. Defaults to "". + llm_id (Text, optional): large language model. Defaults to GPT-4o (6646261c6eb563165658bbb1). + supplier (Text): Supplier of the Agent. + version (Text): Version of the Agent. + backend_url (str): URL of the backend. + api_key (str): The TEAM API key used for authentication. + cost (Dict, optional): model price. Defaults to None. + """ + + def __init__( + self, + id: Text, + name: Text, + tools: List[Tool] = [], + description: Text = "", + llm_id: Text = "6646261c6eb563165658bbb1", + api_key: Optional[Text] = config.TEAM_API_KEY, + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", + version: Optional[Text] = None, + cost: Optional[Dict] = None, + status: AssetStatus = AssetStatus.ONBOARDING, + **additional_info, + ) -> None: + """Create a FineTune with the necessary information. + + Args: + id (Text): ID of the Agent + name (Text): Name of the Agent + tools (List[Tool]): List of tools that the Agent uses. + description (Text, optional): description of the Agent. Defaults to "". + llm_id (Text, optional): large language model. Defaults to GPT-4o (6646261c6eb563165658bbb1). + supplier (Text): Supplier of the Agent. + version (Text): Version of the Agent. + backend_url (str): URL of the backend. + api_key (str): The TEAM API key used for authentication. + cost (Dict, optional): model price. Defaults to None. + """ + # assert len(tools) > 0, "At least one tool must be provided." + super().__init__(id, name, description, api_key, supplier, version, cost=cost) + self.additional_info = additional_info + self.tools = tools + self.llm_id = llm_id + if isinstance(status, str): + try: + status = AssetStatus(status) + except Exception: + status = AssetStatus.ONBOARDING + self.status = status + + def run( + self, + query: Text, + session_id: Optional[Text] = None, + history: Optional[List[Dict]] = None, + name: Text = "model_process", + timeout: float = 300, + parameters: Dict = {}, + wait_time: float = 0.5, + ) -> Dict: + """Runs an agent call. + + Args: + query (Text): query to be processed by the agent. + session_id (Optional[Text], optional): conversation Session ID. Defaults to None. + history (Optional[List[Dict]], optional): chat history (in case session ID is None). Defaults to None. + name (Text, optional): ID given to a call. Defaults to "model_process". + timeout (float, optional): total polling time. Defaults to 300. + parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. + + Returns: + Dict: parsed output from model + """ + start = time.time() + try: + response = self.run_async(query=query, session_id=session_id, history=history, name=name, parameters=parameters) + if response["status"] == "FAILED": + end = time.time() + response["elapsed_time"] = end - start + return response + poll_url = response["url"] + end = time.time() + response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) + return response + except Exception as e: + msg = f"Error in request for {name} - {traceback.format_exc()}" + logging.error(f"Model Run: Error in running for {name}: {e}") + end = time.time() + return {"status": "FAILED", "error": msg, "elapsed_time": end - start} + + def run_async( + self, + query: Text, + session_id: Optional[Text] = None, + history: Optional[List[Dict]] = None, + name: Text = "model_process", + parameters: Dict = {}, + ) -> Dict: + """Runs asynchronously an agent call. + + Args: + query (Text): query to be processed by the agent. + session_id (Optional[Text], optional): conversation Session ID. Defaults to None. + history (Optional[List[Dict]], optional): chat history (in case session ID is None). Defaults to None. + name (Text, optional): ID given to a call. Defaults to "model_process". + parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + + Returns: + dict: polling URL in response + """ + headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + from aixplain.factories.file_factory import FileFactory + + payload = {"id": self.id, "query": FileFactory.to_link(query), "sessionId": session_id, "history": history} + payload.update(parameters) + payload = json.dumps(payload) + + r = _request_with_retry("post", self.url, headers=headers, data=payload) + logging.info(f"Model Run Async: Start service for {name} - {self.url} - {payload} - {headers}") + + resp = None + try: + resp = r.json() + logging.info(f"Result of request for {name} - {r.status_code} - {resp}") + + poll_url = resp["data"] + response = {"status": "IN_PROGRESS", "url": poll_url} + except Exception: + response = {"status": "FAILED"} + msg = f"Error in request for {name} - {traceback.format_exc()}" + logging.error(f"Model Run Async: Error in running for {name}: {resp}") + if resp is not None: + response["error"] = msg + return response + + def delete(self) -> None: + """Delete Corpus service""" + try: + url = urljoin(config.BACKEND_URL, f"sdk/agents/{self.id}") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + logging.debug(f"Start service for DELETE Agent - {url} - {headers}") + r = _request_with_retry("delete", url, headers=headers) + if r.status_code != 200: + raise Exception() + except Exception: + message = f"Agent Deletion Error (HTTP {r.status_code}): Make sure the agent exists and you are the owner." + logging.error(message) + raise Exception(f"{message}") diff --git a/aixplain/modules/agent/tool.py b/aixplain/modules/agent/tool.py new file mode 100644 index 00000000..6651afe7 --- /dev/null +++ b/aixplain/modules/agent/tool.py @@ -0,0 +1,59 @@ +__author__ = "aiXplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Lucas Pavanelli and Thiago Castro Ferreira +Date: May 16th 2024 +Description: + Agentification Class +""" +from typing import Text, Optional + +from aixplain.enums.function import Function +from aixplain.enums.supplier import Supplier + + +class Tool: + """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + + Attributes: + name (Text): name of the tool + description (Text): descriptiion of the tool + function (Function): task that the tool performs + supplier (Optional[Union[Dict, Text, Supplier, int]], optional): Preferred supplier to perform the task. Defaults to None. + """ + + def __init__( + self, + name: Text, + description: Text, + function: Function, + supplier: Optional[Supplier] = None, + **additional_info, + ) -> None: + """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + + Args: + name (Text): name of the tool + description (Text): descriptiion of the tool + function (Function): task that the tool performs + supplier (Optional[Union[Dict, Text, Supplier, int]], optional): Preferred supplier to perform the task. Defaults to None. + """ + self.name = name + self.description = description + self.function = function + self.supplier = supplier + self.additional_info = additional_info diff --git a/aixplain/modules/agent/tool/__init__.py b/aixplain/modules/agent/tool/__init__.py new file mode 100644 index 00000000..2a22511a --- /dev/null +++ b/aixplain/modules/agent/tool/__init__.py @@ -0,0 +1,53 @@ +__author__ = "aiXplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Lucas Pavanelli and Thiago Castro Ferreira +Date: May 16th 2024 +Description: + Agentification Class +""" +from abc import ABC +from typing import Optional, Text + + +class Tool(ABC): + """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + + Attributes: + name (Text): name of the tool + description (Text): descriptiion of the tool + version (Text): version of the tool + """ + + def __init__( + self, + name: Text, + description: Text, + version: Optional[Text] = None, + **additional_info, + ) -> None: + """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + + Args: + name (Text): name of the tool + description (Text): descriptiion of the tool + version (Text): version of the tool + """ + self.name = name + self.description = description + self.version = version + self.additional_info = additional_info diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py new file mode 100644 index 00000000..69bf28d5 --- /dev/null +++ b/aixplain/modules/agent/tool/model_tool.py @@ -0,0 +1,60 @@ +__author__ = "aiXplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Lucas Pavanelli and Thiago Castro Ferreira +Date: May 16th 2024 +Description: + Agentification Class +""" +from typing import Optional + +from aixplain.enums.function import Function +from aixplain.enums.supplier import Supplier +from aixplain.modules.agent.tool import Tool + + +class ModelTool(Tool): + """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + + Attributes: + function (Function): task that the tool performs + supplier (Optional[Union[Dict, Text, Supplier, int]], optional): Preferred supplier to perform the task. Defaults to None. + """ + + def __init__( + self, + function: Function, + supplier: Optional[Supplier] = None, + **additional_info, + ) -> None: + """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + + Args: + function (Function): task that the tool performs + supplier (Optional[Union[Dict, Text, Supplier, int]], optional): Preferred supplier to perform the task. Defaults to None. + """ + super().__init__("", "", **additional_info) + if isinstance(function, str): + function = Function(function) + self.function = function + + try: + if isinstance(supplier, dict): + supplier = Supplier(supplier) + except Exception: + supplier = None + self.supplier = supplier diff --git a/aixplain/modules/agent/tool/pipeline_tool.py b/aixplain/modules/agent/tool/pipeline_tool.py new file mode 100644 index 00000000..a517b198 --- /dev/null +++ b/aixplain/modules/agent/tool/pipeline_tool.py @@ -0,0 +1,52 @@ +__author__ = "aiXplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Lucas Pavanelli and Thiago Castro Ferreira +Date: May 16th 2024 +Description: + Agentification Class +""" +from typing import Text, Union + +from aixplain.modules.agent.tool import Tool +from aixplain.modules.pipeline import Pipeline + + +class PipelineTool(Tool): + """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + + Attributes: + description (Text): descriptiion of the tool + pipeline (Union[Text, Pipeline]): pipeline + """ + + def __init__( + self, + description: Text, + pipeline: Union[Text, Pipeline], + **additional_info, + ) -> None: + """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + + Args: + description (Text): description of the tool + pipeline (Union[Text, Pipeline]): pipeline + """ + super().__init__("", description, **additional_info) + if isinstance(pipeline, Pipeline): + pipeline = pipeline.id + self.pipeline = pipeline diff --git a/aixplain/modules/asset.py b/aixplain/modules/asset.py index 52b79912..c453415d 100644 --- a/aixplain/modules/asset.py +++ b/aixplain/modules/asset.py @@ -57,7 +57,13 @@ def __init__( elif isinstance(supplier, Dict) is True: self.supplier = Supplier(supplier) else: - self.supplier = supplier + self.supplier = None + for supplier_ in Supplier: + if supplier.lower() in [supplier_.value["code"].lower(), supplier_.value["name"].lower()]: + self.supplier = supplier_ + break + if self.supplier is None: + self.supplier = supplier except Exception: self.supplier = str(supplier) self.version = version diff --git a/aixplain/modules/finetune/__init__.py b/aixplain/modules/finetune/__init__.py index e1b63941..fe2cb15c 100644 --- a/aixplain/modules/finetune/__init__.py +++ b/aixplain/modules/finetune/__init__.py @@ -26,7 +26,6 @@ from urllib.parse import urljoin from aixplain.modules.finetune.cost import FinetuneCost from aixplain.modules.finetune.hyperparameters import Hyperparameters -from aixplain.factories.model_factory import ModelFactory from aixplain.modules.asset import Asset from aixplain.modules.dataset import Dataset from aixplain.modules.model import Model @@ -110,7 +109,7 @@ def start(self) -> Model: """ payload = {} try: - url = urljoin(self.backend_url, f"sdk/finetune") + url = urljoin(self.backend_url, "sdk/finetune") headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "application/json"} payload = { "name": self.name, @@ -134,6 +133,8 @@ def start(self) -> Model: r = _request_with_retry("post", url, headers=headers, json=payload) resp = r.json() logging.info(f"Response for POST Start FineTune - Name: {self.name} / Status {resp}") + from aixplain.factories.model_factory import ModelFactory + return ModelFactory().get(resp["id"]) except Exception: message = "" diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index d591772b..86c08a08 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -24,9 +24,6 @@ from typing import Optional, Text, List, Union from aixplain.modules.asset import Asset -from aixplain.utils.file_utils import _request_with_retry -from aixplain.factories.model_factory import ModelFactory - class Metric(Asset): """Represents a metric to be computed on one or more peices of data. It is usually linked to a machine learning task. diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 285cbe55..4be40225 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -24,9 +24,7 @@ import json import logging import traceback -from typing import List -from aixplain.factories.file_factory import FileFactory -from aixplain.enums import Function, Supplier +from aixplain.enums import Supplier, Function from aixplain.modules.asset import Asset from aixplain.utils import config from urllib.parse import urljoin @@ -57,7 +55,7 @@ def __init__( id: Text, name: Text, description: Text = "", - api_key: Optional[Text] = None, + api_key: Text = config.TEAM_API_KEY, supplier: Union[Dict, Text, Supplier, int] = "aiXplain", version: Optional[Text] = None, function: Optional[Function] = None, @@ -163,7 +161,7 @@ def poll(self, poll_url: Text, name: Text = "model_process") -> Dict: resp["status"] = "FAILED" else: resp["status"] = "IN_PROGRESS" - logging.info(f"Single Poll for Model: Status of polling for {name}: {resp}") + logging.debug(f"Single Poll for Model: Status of polling for {name}: {resp}") except Exception as e: resp = {"status": "FAILED"} logging.error(f"Single Poll for Model: Error of polling for {name}: {e}") @@ -218,6 +216,7 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param dict: polling URL in response """ headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + from aixplain.factories.file_factory import FileFactory data = FileFactory.to_link(data) if isinstance(data, dict): @@ -229,7 +228,7 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param if isinstance(payload, int) is True or isinstance(payload, float) is True: payload = str(payload) payload = {"data": payload} - except Exception as e: + except Exception: payload = {"data": data} payload.update(parameters) payload = json.dumps(payload) @@ -245,7 +244,7 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param poll_url = resp["data"] response = {"status": "IN_PROGRESS", "url": poll_url} - except Exception as e: + except Exception: response = {"status": "FAILED"} msg = f"Error in request for {name} - {traceback.format_exc()}" logging.error(f"Model Run Async: Error in running for {name}: {resp}") @@ -267,7 +266,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): """ from aixplain.enums.asset_status import AssetStatus from aixplain.modules.finetune.status import FinetuneStatus - + headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} resp = None try: @@ -311,7 +310,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): logging.info(f"Response for GET Check FineTune status Model - Id {self.id} / Status {status.status.value}.") return status - except Exception as e: + except Exception: message = "" if resp is not None and "statusCode" in resp: status_code = resp["statusCode"] diff --git a/pyproject.toml b/pyproject.toml index 112c8f9a..73980717 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.12" +version = "0.2.13rc2" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py new file mode 100644 index 00000000..f58dcb63 --- /dev/null +++ b/tests/functional/agent/agent_functional_test.py @@ -0,0 +1,75 @@ +__author__ = "lucaspavanelli" + +""" +Copyright 2022 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import json +from dotenv import load_dotenv + +load_dotenv() +from aixplain.factories import AgentFactory +from aixplain.modules.agent import ModelTool, PipelineTool +from aixplain.enums.supplier import Supplier + +import pytest + +RUN_FILE = "tests/functional/agent/data/agent_test_end2end.json" + + +def read_data(data_path): + return json.load(open(data_path, "r")) + + +@pytest.fixture(scope="module", params=read_data(RUN_FILE)) +def run_input_map(request): + return request.param + + +def test_end2end(run_input_map): + tools = [] + if "model_tools" in run_input_map: + for tool in run_input_map["model_tools"]: + for supplier in Supplier: + if tool["supplier"] is not None and tool["supplier"].lower() in [ + supplier.value["code"].lower(), + supplier.value["name"].lower(), + ]: + tool["supplier"] = supplier + break + tools.append(ModelTool(function=tool["function"], supplier=tool["supplier"])) + if "pipeline_tools" in run_input_map: + for tool in run_input_map["pipeline_tools"]: + tools.append(PipelineTool(description=tool["description"], pipeline=tool["pipeline_id"])) + print(f"Creating agent with tools: {tools}") + agent = AgentFactory.create(name=run_input_map["agent_name"], llm_id=run_input_map["llm_id"], tools=tools) + print(f"Agent created: {agent.__dict__}") + print("Running agent") + response = agent.run(query=run_input_map["query"]) + print(f"Agent response: {response}") + assert response is not None + assert response["completed"] is True + assert response["status"].lower() == "success" + assert "data" in response + assert response["data"]["session_id"] is not None + assert response["data"]["output"] is not None + print("Deleting agent") + agent.delete() + + +def test_list_agents(): + agents = AgentFactory.list() + assert "results" in agents + agents_result = agents["results"] + assert type(agents_result) is list diff --git a/tests/functional/agent/data/agent_test_end2end.json b/tests/functional/agent/data/agent_test_end2end.json new file mode 100644 index 00000000..147928fe --- /dev/null +++ b/tests/functional/agent/data/agent_test_end2end.json @@ -0,0 +1,14 @@ +[ + { + "agent_name": "[TEST] Translation agent", + "llm_id": "6626a3a8c8f1d089790cf5a2", + "llm_name": "Groq Llama 3 70B", + "query": "Who is the president of Brazil right now? Translate to pt", + "model_tools": [ + { + "function": "translation", + "supplier": "AWS" + } + ] + } +] From e9091c228585b0aa7a155b1630d7bac5f0dc8582 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 30 Jul 2024 21:34:32 +0200 Subject: [PATCH 014/105] Fixing circular import in the SDK (#211) Co-authored-by: Thiago Castro Ferreira --- aixplain/modules/model/llm_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 14b9c7f4..5c5c4140 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -24,7 +24,6 @@ import json import logging import traceback -from aixplain.factories.file_factory import FileFactory from aixplain.enums import Function, Supplier from aixplain.modules.model import Model from aixplain.utils import config @@ -182,6 +181,8 @@ def run_async( """ headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + from aixplain.factories.file_factory import FileFactory + data = FileFactory.to_link(data) if isinstance(data, dict): payload = data From f4378153221ae5f5cc247f1667b1f709b880215e Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 2 Aug 2024 11:00:19 -0300 Subject: [PATCH 015/105] create model/pipeline tools from AgentFactory (#214) Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/agent_factory/__init__.py | 23 +++++++++++++++++++ .../functional/agent/agent_functional_test.py | 5 ++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 36380a76..7507eef4 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -24,10 +24,12 @@ import json import logging +from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier from aixplain.modules.agent import Agent, Tool from aixplain.modules.agent.tool.model_tool import ModelTool from aixplain.modules.agent.tool.pipeline_tool import PipelineTool +from aixplain.modules.pipeline import Pipeline from aixplain.utils import config from typing import Dict, List, Optional, Text, Union @@ -113,6 +115,27 @@ def create( raise Exception(e) return agent + @classmethod + def create_model_tool(cls, function: Union[Function, Text], supplier: Optional[Union[Supplier, Text]] = None) -> ModelTool: + """Create a new model tool.""" + if isinstance(function, str): + function = Function(function) + + if supplier is not None: + if isinstance(supplier, str): + for supplier_ in Supplier: + if supplier.lower() in [supplier.value["code"].lower(), supplier.value["name"].lower()]: + supplier = supplier_ + break + if isinstance(supplier, str): + supplier = None + return ModelTool(function=function, supplier=supplier) + + @classmethod + def create_pipeline_tool(cls, description: Text, pipeline: Union[Pipeline, Text]) -> PipelineTool: + """Create a new pipeline tool.""" + return PipelineTool(description=description, pipeline=pipeline) + @classmethod def list(cls) -> Dict: """List all agents available in the platform.""" diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index f58dcb63..766ba386 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -20,7 +20,6 @@ load_dotenv() from aixplain.factories import AgentFactory -from aixplain.modules.agent import ModelTool, PipelineTool from aixplain.enums.supplier import Supplier import pytest @@ -48,10 +47,10 @@ def test_end2end(run_input_map): ]: tool["supplier"] = supplier break - tools.append(ModelTool(function=tool["function"], supplier=tool["supplier"])) + tools.append(AgentFactory.create_model_tool(function=tool["function"], supplier=tool["supplier"])) if "pipeline_tools" in run_input_map: for tool in run_input_map["pipeline_tools"]: - tools.append(PipelineTool(description=tool["description"], pipeline=tool["pipeline_id"])) + tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) print(f"Creating agent with tools: {tools}") agent = AgentFactory.create(name=run_input_map["agent_name"], llm_id=run_input_map["llm_id"], tools=tools) print(f"Agent created: {agent.__dict__}") From 03009c6bc474c4cee416423d9b96ffc2f539bdea Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 7 Aug 2024 11:36:23 -0300 Subject: [PATCH 016/105] Set model ID as a parameter (#216) --- aixplain/factories/agent_factory/__init__.py | 15 +++++++--- aixplain/factories/agent_factory/utils.py | 3 +- aixplain/modules/agent/tool/model_tool.py | 28 +++++++++++++++---- .../functional/agent/agent_functional_test.py | 2 +- .../agent/data/agent_test_end2end.json | 5 ++++ 5 files changed, 41 insertions(+), 12 deletions(-) diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 7507eef4..6076eef6 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -29,6 +29,7 @@ from aixplain.modules.agent import Agent, Tool from aixplain.modules.agent.tool.model_tool import ModelTool from aixplain.modules.agent.tool.pipeline_tool import PipelineTool +from aixplain.modules.model import Model from aixplain.modules.pipeline import Pipeline from aixplain.utils import config from typing import Dict, List, Optional, Text, Union @@ -66,11 +67,12 @@ def create( if isinstance(tool, ModelTool): tool_payload.append( { - "function": tool.function.value, + "function": tool.function.value if tool.function is not None else None, "type": "model", "description": tool.description, "supplier": tool.supplier.value["code"] if tool.supplier else None, "version": tool.version if tool.version else None, + "assetId": tool.model, } ) elif isinstance(tool, PipelineTool): @@ -116,9 +118,14 @@ def create( return agent @classmethod - def create_model_tool(cls, function: Union[Function, Text], supplier: Optional[Union[Supplier, Text]] = None) -> ModelTool: + def create_model_tool( + cls, + model: Optional[Union[Model, Text]] = None, + function: Optional[Union[Function, Text]] = None, + supplier: Optional[Union[Supplier, Text]] = None, + ) -> ModelTool: """Create a new model tool.""" - if isinstance(function, str): + if function is not None and isinstance(function, str): function = Function(function) if supplier is not None: @@ -129,7 +136,7 @@ def create_model_tool(cls, function: Union[Function, Text], supplier: Optional[U break if isinstance(supplier, str): supplier = None - return ModelTool(function=function, supplier=supplier) + return ModelTool(function=function, supplier=supplier, model=model) @classmethod def create_pipeline_tool(cls, description: Text, pipeline: Union[Pipeline, Text]) -> PipelineTool: diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index 6363a08e..4b314ef7 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -22,9 +22,10 @@ def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: break tool = ModelTool( - function=Function(tool["function"]), + function=Function(tool["function"]) if tool["function"] is not None else None, supplier=tool["supplier"], version=tool["version"], + model=tool["assetId"], ) elif tool["type"] == "pipeline": tool = PipelineTool(description=tool["description"], pipeline=tool["assetId"]) diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index 69bf28d5..a5acab30 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -20,11 +20,13 @@ Description: Agentification Class """ -from typing import Optional +from typing import Optional, Union, Text from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier +from aixplain.factories.model_factory import ModelFactory from aixplain.modules.agent.tool import Tool +from aixplain.modules.model import Model class ModelTool(Tool): @@ -37,19 +39,25 @@ class ModelTool(Tool): def __init__( self, - function: Function, + function: Optional[Function] = None, supplier: Optional[Supplier] = None, + model: Optional[Union[Text, Model]] = None, **additional_info, ) -> None: """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. Args: - function (Function): task that the tool performs - supplier (Optional[Union[Dict, Text, Supplier, int]], optional): Preferred supplier to perform the task. Defaults to None. + function (Optional[Function], optional): task that the tool performs. Defaults to None. + supplier (Optional[Supplier], optional): Preferred supplier to perform the task. Defaults to None.. Defaults to None. + model (Optional[Union[Text, Model]], optional): Model function. Defaults to None. """ + assert ( + function is not None or model is not None + ), "Agent Creation Error: Either function or model must be provided when instantiating a tool." super().__init__("", "", **additional_info) - if isinstance(function, str): - function = Function(function) + if function is not None: + if isinstance(function, str): + function = Function(function) self.function = function try: @@ -57,4 +65,12 @@ def __init__( supplier = Supplier(supplier) except Exception: supplier = None + + if model is not None: + if isinstance(model, Text) is True: + model = ModelFactory.get(model) + if isinstance(model.supplier, Supplier): + supplier = model.supplier + model = model.id self.supplier = supplier + self.model = model diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 766ba386..427f62e5 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -47,7 +47,7 @@ def test_end2end(run_input_map): ]: tool["supplier"] = supplier break - tools.append(AgentFactory.create_model_tool(function=tool["function"], supplier=tool["supplier"])) + tools.append(AgentFactory.create_model_tool(**tool)) if "pipeline_tools" in run_input_map: for tool in run_input_map["pipeline_tools"]: tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) diff --git a/tests/functional/agent/data/agent_test_end2end.json b/tests/functional/agent/data/agent_test_end2end.json index 147928fe..94bfc94b 100644 --- a/tests/functional/agent/data/agent_test_end2end.json +++ b/tests/functional/agent/data/agent_test_end2end.json @@ -8,6 +8,11 @@ { "function": "translation", "supplier": "AWS" + }, + { + "model": "60ddefca8d38c51c58860108", + "function": null, + "supplier": null } ] } From 02f7482466e94422e070acbda901cd426709cf82 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 7 Aug 2024 19:13:42 -0300 Subject: [PATCH 017/105] Content inputs to be processed according to the query. (#215) * Content inputs to be processed according to the query. * Add data and query parameters on running agent --- aixplain/modules/agent/__init__.py | 53 ++++++++++++++++--- .../functional/agent/agent_functional_test.py | 2 +- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index 2f244d56..8a5cd120 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -28,6 +28,7 @@ from aixplain.utils.file_utils import _request_with_retry from aixplain.enums.supplier import Supplier from aixplain.enums.asset_status import AssetStatus +from aixplain.enums.storage_type import StorageType from aixplain.modules.model import Model from aixplain.modules.agent.tool import Tool from aixplain.modules.agent.tool.model_tool import ModelTool @@ -96,31 +97,43 @@ def __init__( def run( self, - query: Text, + data: Optional[Union[Dict, Text]] = None, + query: Optional[Text] = None, session_id: Optional[Text] = None, history: Optional[List[Dict]] = None, name: Text = "model_process", timeout: float = 300, parameters: Dict = {}, wait_time: float = 0.5, + content: List[Text] = [], ) -> Dict: """Runs an agent call. Args: - query (Text): query to be processed by the agent. + data (Optional[Union[Dict, Text]], optional): data to be processed by the agent. Defaults to None. + query (Optional[Text], optional): query to be processed by the agent. Defaults to None. session_id (Optional[Text], optional): conversation Session ID. Defaults to None. history (Optional[List[Dict]], optional): chat history (in case session ID is None). Defaults to None. name (Text, optional): ID given to a call. Defaults to "model_process". timeout (float, optional): total polling time. Defaults to 300. parameters (Dict, optional): optional parameters to the model. Defaults to "{}". wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. + content (List[Text], optional): Content inputs to be processed according to the query. Defaults to []. Returns: Dict: parsed output from model """ start = time.time() try: - response = self.run_async(query=query, session_id=session_id, history=history, name=name, parameters=parameters) + response = self.run_async( + data=data, + query=query, + session_id=session_id, + history=history, + name=name, + parameters=parameters, + content=content, + ) if response["status"] == "FAILED": end = time.time() response["elapsed_time"] = end - start @@ -137,27 +150,55 @@ def run( def run_async( self, - query: Text, + data: Optional[Union[Dict, Text]] = None, + query: Optional[Text] = None, session_id: Optional[Text] = None, history: Optional[List[Dict]] = None, name: Text = "model_process", parameters: Dict = {}, + content: List[Text] = [], ) -> Dict: """Runs asynchronously an agent call. Args: - query (Text): query to be processed by the agent. + data (Optional[Union[Dict, Text]], optional): data to be processed by the agent. Defaults to None. + query (Optional[Text], optional): query to be processed by the agent. Defaults to None. session_id (Optional[Text], optional): conversation Session ID. Defaults to None. history (Optional[List[Dict]], optional): chat history (in case session ID is None). Defaults to None. name (Text, optional): ID given to a call. Defaults to "model_process". parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + content (List[Text], optional): Content inputs to be processed according to the query. Defaults to []. Returns: dict: polling URL in response """ - headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} from aixplain.factories.file_factory import FileFactory + assert data is not None or query is not None, "Either 'data' or 'query' must be provided." + if data is not None: + if isinstance(data, dict): + assert "query" in data and data["query"] is not None, "When providing a dictionary, 'query' must be provided." + query = data.get("query") + if session_id is None: + session_id = data.get("session_id") + if history is None: + history = data.get("history") + if len(content) == 0: + content = data.get("content", []) + else: + query = data + + # process content inputs + content = list(set(content)) + if len(content) > 0: + assert FileFactory.check_storage_type(query) == StorageType.TEXT, "When providing 'content', query must be text." + assert len(content) <= 3, "The maximum number of content inputs is 3." + for input_link in content: + input_link = FileFactory.to_link(input_link) + query += f"\n{input_link}" + + headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + payload = {"id": self.id, "query": FileFactory.to_link(query), "sessionId": session_id, "history": history} payload.update(parameters) payload = json.dumps(payload) diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 427f62e5..cefd34c3 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -55,7 +55,7 @@ def test_end2end(run_input_map): agent = AgentFactory.create(name=run_input_map["agent_name"], llm_id=run_input_map["llm_id"], tools=tools) print(f"Agent created: {agent.__dict__}") print("Running agent") - response = agent.run(query=run_input_map["query"]) + response = agent.run(data=run_input_map["query"]) print(f"Agent response: {response}") assert response is not None assert response["completed"] is True From 4947959d65c4ddcf94410fbcbb73449bdf0a08a9 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Sat, 10 Aug 2024 02:12:23 +0300 Subject: [PATCH 018/105] ENG-1: programmatic api introduced (#219) * ENG-1: programmatic api introduced * pipeline.py in .gitignore * jinja dep * fixed data type refs * migrated to designer structure * updated readme * fixed gitignore * enabled node number and label * auto populate params by spec * naming review * designer functional tests * Pipeline getter * unit tests restored * pipeline naming fixed * minor improvements * Fixture on designer functional tests * Fixing input node and link param mapping * Script Nodes and filling missing /o values * Decision getter * README update * minor * moved all designer samples as functional tests --------- Co-authored-by: kadir pekel Co-authored-by: Thiago Castro Ferreira --- .gitignore | 3 + aixplain/enums/data_type.py | 4 +- aixplain/enums/function.py | 1 + aixplain/factories/file_factory.py | 6 +- .../__init__.py} | 117 +- aixplain/factories/pipeline_factory/utils.py | 100 + aixplain/factories/script_factory.py | 31 + aixplain/modules/pipeline/__init__.py | 3 + .../{pipeline.py => pipeline/asset.py} | 195 +- aixplain/modules/pipeline/default.py | 17 + aixplain/modules/pipeline/designer/README.md | 194 + .../modules/pipeline/designer/__init__.py | 66 + aixplain/modules/pipeline/designer/base.py | 375 ++ aixplain/modules/pipeline/designer/enums.py | 43 + aixplain/modules/pipeline/designer/mixins.py | 85 + aixplain/modules/pipeline/designer/nodes.py | 464 ++ .../modules/pipeline/designer/pipeline.py | 328 ++ aixplain/modules/pipeline/generate.py | 227 + aixplain/modules/pipeline/pipeline.py | 4548 +++++++++++++++++ pyproject.toml | 3 +- tests/functional/pipelines/data/script.py | 51 + tests/functional/pipelines/designer_test.py | 248 + tests/unit/designer_test.py | 707 +++ 23 files changed, 7746 insertions(+), 70 deletions(-) rename aixplain/factories/{pipeline_factory.py => pipeline_factory/__init__.py} (77%) create mode 100644 aixplain/factories/pipeline_factory/utils.py create mode 100644 aixplain/factories/script_factory.py create mode 100644 aixplain/modules/pipeline/__init__.py rename aixplain/modules/{pipeline.py => pipeline/asset.py} (68%) create mode 100644 aixplain/modules/pipeline/default.py create mode 100644 aixplain/modules/pipeline/designer/README.md create mode 100644 aixplain/modules/pipeline/designer/__init__.py create mode 100644 aixplain/modules/pipeline/designer/base.py create mode 100644 aixplain/modules/pipeline/designer/enums.py create mode 100644 aixplain/modules/pipeline/designer/mixins.py create mode 100644 aixplain/modules/pipeline/designer/nodes.py create mode 100644 aixplain/modules/pipeline/designer/pipeline.py create mode 100644 aixplain/modules/pipeline/generate.py create mode 100644 aixplain/modules/pipeline/pipeline.py create mode 100644 tests/functional/pipelines/data/script.py create mode 100644 tests/functional/pipelines/designer_test.py create mode 100644 tests/unit/designer_test.py diff --git a/.gitignore b/.gitignore index b6e47617..843c6556 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ dmypy.json # Pyre type checker .pyre/ + +# Vscode +.vscode diff --git a/aixplain/enums/data_type.py b/aixplain/enums/data_type.py index fa79d070..11432bcf 100644 --- a/aixplain/enums/data_type.py +++ b/aixplain/enums/data_type.py @@ -24,7 +24,7 @@ from enum import Enum -class DataType(Enum): +class DataType(str, Enum): AUDIO = "audio" FLOAT = "float" IMAGE = "image" @@ -33,6 +33,8 @@ class DataType(Enum): TENSOR = "tensor" TEXT = "text" VIDEO = "video" + EMBEDDING = "embedding" + NUMBER = "number" def __str__(self): return self._value_ diff --git a/aixplain/enums/function.py b/aixplain/enums/function.py index bfab8427..9a6f47d4 100644 --- a/aixplain/enums/function.py +++ b/aixplain/enums/function.py @@ -55,6 +55,7 @@ def load_functions(): if input_data_object["required"] is True }, "output": {output_data_object["dataType"] for output_data_object in function["output"]}, + "spec": function } for function in resp["items"] } diff --git a/aixplain/factories/file_factory.py b/aixplain/factories/file_factory.py index e7d7c4da..adbebcd3 100644 --- a/aixplain/factories/file_factory.py +++ b/aixplain/factories/file_factory.py @@ -104,7 +104,7 @@ def check_storage_type(cls, input_link: Any) -> StorageType: return StorageType.TEXT @classmethod - def to_link(cls, data: Union[Text, Dict]) -> Union[Text, Dict]: + def to_link(cls, data: Union[Text, Dict], **kwargs) -> Union[Text, Dict]: """If user input data is a local file, upload to aiXplain platform Args: @@ -117,10 +117,10 @@ def to_link(cls, data: Union[Text, Dict]) -> Union[Text, Dict]: for key in data: if isinstance(data[key], str): if cls.check_storage_type(data[key]) == StorageType.FILE: - data[key] = cls.upload(local_path=data[key]) + data[key] = cls.upload(local_path=data[key], **kwargs) elif isinstance(data, str): if cls.check_storage_type(data) == StorageType.FILE: - data = cls.upload(local_path=data) + data = cls.upload(local_path=data, **kwargs) return data @classmethod diff --git a/aixplain/factories/pipeline_factory.py b/aixplain/factories/pipeline_factory/__init__.py similarity index 77% rename from aixplain/factories/pipeline_factory.py rename to aixplain/factories/pipeline_factory/__init__.py index 61bcb214..051c63fb 100644 --- a/aixplain/factories/pipeline_factory.py +++ b/aixplain/factories/pipeline_factory/__init__.py @@ -21,9 +21,10 @@ Pipeline Factory Class """ import json -import logging import os +import logging from typing import Dict, List, Optional, Text, Union +from aixplain.factories.pipeline_factory.utils import build_from_response from aixplain.enums.data_type import DataType from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier @@ -45,27 +46,6 @@ class PipelineFactory: aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL - @classmethod - def __get_typed_nodes(cls, response: Dict, type: str) -> List[Dict]: - # read "nodes" field from response and return the nodes that are marked by "type": type - return [node for node in response["nodes"] if node["type"].lower() == type.lower()] - - @classmethod - def __from_response(cls, response: Dict) -> Pipeline: - """Converts response Json to 'Pipeline' object - - Args: - response (Dict): Json from API - - Returns: - Pipeline: Coverted 'Pipeline' object - """ - if "api_key" not in response: - response["api_key"] = config.TEAM_API_KEY - input = cls.__get_typed_nodes(response, "input") - output = cls.__get_typed_nodes(response, "output") - return Pipeline(response["id"], response["name"], response["api_key"], input=input, output=output) - @classmethod def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: """Create a 'Pipeline' object from pipeline id @@ -81,11 +61,20 @@ def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: try: url = urljoin(cls.backend_url, f"sdk/pipelines/{pipeline_id}") if api_key is not None: - headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} + headers = { + "Authorization": f"Token {api_key}", + "Content-Type": "application/json", + } elif cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + headers = { + "x-aixplain-key": f"{cls.aixplain_key}", + "Content-Type": "application/json", + } else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = { + "Authorization": f"Token {config.TEAM_API_KEY}", + "Content-Type": "application/json", + } logging.info(f"Start service for GET Pipeline - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() @@ -93,16 +82,17 @@ def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: resp["api_key"] = config.TEAM_API_KEY if api_key is not None: resp["api_key"] = api_key - pipeline = cls.__from_response(resp) + pipeline = build_from_response(resp, load_architecture=True) return pipeline - except Exception: + except Exception as e: + logging.exception(e) status_code = 400 if resp is not None and "statusCode" in resp: status_code = resp["statusCode"] message = resp["message"] message = f"Pipeline Creation: Status {status_code} - {message}" else: - message = "Pipeline Creation: Unspecified Error" + message = f"Pipeline Creation: Unspecified Error {e}" logging.error(message) raise Exception(f"Status {status_code}: {message}") @@ -127,14 +117,20 @@ def get_assets_from_page(cls, page_number: int) -> List[Pipeline]: try: url = urljoin(cls.backend_url, f"sdk/pipelines/?pageNumber={page_number}") if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + headers = { + "x-aixplain-key": f"{cls.aixplain_key}", + "Content-Type": "application/json", + } else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = { + "Authorization": f"Token {config.TEAM_API_KEY}", + "Content-Type": "application/json", + } r = _request_with_retry("get", url, headers=headers) resp = r.json() logging.info(f"Listing Pipelines: Status of getting Pipelines on Page {page_number}: {resp}") all_pipelines = resp["items"] - pipeline_list = [cls.__from_response(pipeline_info_json) for pipeline_info_json in all_pipelines] + pipeline_list = [build_from_response(pipeline_info_json) for pipeline_info_json in all_pipelines] return pipeline_list except Exception as e: error_message = f"Listing Pipelines: Error in getting Pipelines on Page {page_number}: {e}" @@ -177,9 +173,15 @@ def list( url = urljoin(cls.backend_url, "sdk/pipelines/paginate") if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + headers = { + "x-aixplain-key": f"{cls.aixplain_key}", + "Content-Type": "application/json", + } else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = { + "Authorization": f"Token {config.TEAM_API_KEY}", + "Content-Type": "application/json", + } assert 0 < page_size <= 100, "Pipeline List Error: Page size must be greater than 0 and not exceed 100." payload = { @@ -228,11 +230,43 @@ def list( total = resp["total"] logging.info(f"Response for POST List Pipeline - Page Total: {page_total} / Total: {total}") for pipeline in results: - pipelines.append(cls.__from_response(pipeline)) - return {"results": pipelines, "page_total": page_total, "page_number": page_number, "total": total} + pipelines.append(build_from_response(pipeline)) + return { + "results": pipelines, + "page_total": page_total, + "page_number": page_number, + "total": total, + } @classmethod - def create(cls, name: Text, pipeline: Union[Text, Dict], api_key: Optional[Text] = None) -> Pipeline: + def init(cls, name: Text, api_key: Optional[Text] = None) -> Pipeline: + """Initialize a new Pipeline + + Args: + name (Text): Pipeline Name + api_key (Optional[Text], optional): Team API Key to create the Pipeline. Defaults to None. + + Returns: + Pipeline: instance of the new pipeline + """ + if api_key is None: + api_key = config.TEAM_API_KEY + return Pipeline( + id="", + name=name, + api_key=api_key, + nodes=[], + links=[], + instance=None, + ) + + @classmethod + def create( + cls, + name: Text, + pipeline: Union[Text, Dict], + api_key: Optional[Text] = None, + ) -> Pipeline: """Draft Pipeline Creation Args: @@ -259,10 +293,17 @@ def create(cls, name: Text, pipeline: Union[Text, Dict], api_key: Optional[Text] if "functionType" in node and node["functionType"] == "AI": pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload - payload = {"name": name, "status": "draft", "architecture": pipeline} + payload = { + "name": name, + "status": "draft", + "architecture": pipeline, + } url = urljoin(cls.backend_url, "sdk/pipelines") api_key = api_key if api_key is not None else config.TEAM_API_KEY - headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} + headers = { + "Authorization": f"Token {api_key}", + "Content-Type": "application/json", + } logging.info(f"Start service for POST Create Pipeline - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) response = r.json() diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py new file mode 100644 index 00000000..465e5e7f --- /dev/null +++ b/aixplain/factories/pipeline_factory/utils.py @@ -0,0 +1,100 @@ +__author__ = "aixplain" +import logging + +from aixplain.enums import DataType +import aixplain.utils.config as config +from aixplain.modules.pipeline import Pipeline +from aixplain.modules.pipeline.designer import ( + Input, + Output, + AssetNode, + Decision, + Router, + Route, + Script, + Link, +) +from typing import Dict + + +def build_from_response(response: Dict, load_architecture: bool = False) -> Pipeline: + """Converts response Json to 'Pipeline' object + + Args: + response (Dict): Json from API + load_architecture (bool, optional): If True, the architecture will be loaded. Defaults to False. + + Returns: + Pipeline: Coverted 'Pipeline' object + """ + if "api_key" not in response: + response["api_key"] = config.TEAM_API_KEY + + # instantiating pipeline generic info + pipeline = Pipeline(response["id"], response["name"], response["api_key"]) + if load_architecture is True: + try: + # instantiating nodes + for node_json in response["nodes"]: + print(node_json) + if node_json["type"].lower() == "input": + node = Input( + data=node_json["data"] if "data" in node_json else None, + data_types=[DataType(dt) for dt in node_json["dataType"]], + ) + elif node_json["type"].lower() == "asset": + node = AssetNode(asset_id=node_json["assetId"]) + elif node_json["type"].lower() == "segmentor": + raise NotImplementedError() + elif node_json["type"].lower() == "reconstructor": + raise NotImplementedError() + elif node_json["type"].lower() == "decision": + node = Decision(routes=[Route(**route) for route in node_json["routes"]]) + elif node_json["type"].lower() == "router": + node = Router(routes=[Route(**route) for route in node_json["routes"]]) + elif node_json["type"].lower() == "script": + node = Script(fileId=node_json["fileId"]) + elif node_json["type"].lower() == "output": + node = Output() + + if "inputValues" in node_json: + [ + node.inputs.create_param( + data_type=DataType(input_param["dataType"]) if "dataType" in input_param else None, + code=input_param["code"], + value=input_param["value"] if "value" in input_param else None, + is_required=input_param["isRequired"] if "isRequired" in input_param else False, + ) + for input_param in node_json["inputValues"] + if input_param["code"] not in node.inputs + ] + if "outputValues" in node_json: + [ + node.outputs.create_param( + data_type=DataType(output_param["dataType"]) if "dataType" in output_param else None, + code=output_param["code"], + value=output_param["value"] if "value" in output_param else None, + is_required=output_param["isRequired"] if "isRequired" in output_param else False, + ) + for output_param in node_json["outputValues"] + if output_param["code"] not in node.outputs + ] + node.number = node_json["number"] + node.label = node_json["label"] + pipeline.add_node(node) + + # instantiating links + for link_json in response["links"]: + for param_mapping in link_json["paramMapping"]: + link = Link( + from_node=pipeline.get_node(link_json["from"]), + to_node=pipeline.get_node(link_json["to"]), + from_param=param_mapping["from"], + to_param=param_mapping["to"], + ) + pipeline.add_link(link) + except Exception as e: + logging.warning("Error loading pipeline architecture:, error: %s", e) + pipeline.nodes = [] + pipeline.links = [] + return pipeline diff --git a/aixplain/factories/script_factory.py b/aixplain/factories/script_factory.py new file mode 100644 index 00000000..35789561 --- /dev/null +++ b/aixplain/factories/script_factory.py @@ -0,0 +1,31 @@ +import os +import json +from typing import Tuple + +import requests + +from aixplain.utils import config + + +class ScriptFactory: + + @classmethod + def upload_script(cls, script_path: str) -> Tuple[str, str]: + try: + url = f"{config.BACKEND_URL}/sdk/pipelines/script" + headers = {"Authorization": f"Token {config.TEAM_API_KEY}"} + r = requests.post( + url, headers=headers, files={"file": open(script_path, "rb")} + ) + if 200 <= r.status_code < 300: + response = r.json() + else: + raise Exception() + except Exception: + response = {"fileId": ""} + + # get metadata info + fname = os.path.splitext(os.path.basename(script_path))[0] + file_size_kb = int(os.path.getsize(script_path) / 1024) + metadata = json.dumps({"name": fname, "size": file_size_kb}) + return response["fileId"], metadata diff --git a/aixplain/modules/pipeline/__init__.py b/aixplain/modules/pipeline/__init__.py new file mode 100644 index 00000000..d2381c3a --- /dev/null +++ b/aixplain/modules/pipeline/__init__.py @@ -0,0 +1,3 @@ +from .pipeline import Pipeline + +__all__ = ["Pipeline"] diff --git a/aixplain/modules/pipeline.py b/aixplain/modules/pipeline/asset.py similarity index 68% rename from aixplain/modules/pipeline.py rename to aixplain/modules/pipeline/asset.py index ed131018..ce168036 100644 --- a/aixplain/modules/pipeline.py +++ b/aixplain/modules/pipeline/asset.py @@ -24,6 +24,7 @@ import time import json import os +import uuid import logging from aixplain.modules.asset import Asset from aixplain.utils import config @@ -66,13 +67,20 @@ def __init__( version (Text, optional): version of the pipeline. Defaults to "1.0". **additional_info: Any additional Pipeline info to be saved """ + if not name: + raise ValueError("Pipeline name is required") + super().__init__(id, name, "", supplier, version) self.api_key = api_key self.url = f"{url}/assets/pipeline/execution/run" self.additional_info = additional_info def __polling( - self, poll_url: Text, name: Text = "pipeline_process", wait_time: float = 1.0, timeout: float = 20000.0 + self, + poll_url: Text, + name: Text = "pipeline_process", + wait_time: float = 1.0, + timeout: float = 20000.0, ) -> Dict: """Keeps polling the platform to check whether an asynchronous call is done. @@ -93,7 +101,9 @@ def __polling( while not completed and (end - start) < timeout: try: response_body = self.poll(poll_url, name=name) - logging.debug(f"Polling for Pipeline: Status of polling for {name} : {response_body}") + logging.debug( + f"Polling for Pipeline: Status of polling for {name} : {response_body}" + ) completed = response_body["completed"] end = time.time() @@ -102,12 +112,18 @@ def __polling( if wait_time < 60: wait_time *= 1.1 except Exception: - logging.error(f"Polling for Pipeline: polling for {name} : Continue") + logging.error( + f"Polling for Pipeline: polling for {name} : Continue" + ) if response_body and response_body["status"] == "SUCCESS": try: - logging.debug(f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}") + logging.debug( + f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}" + ) except Exception: - logging.error(f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}") + logging.error( + f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}" + ) else: logging.error( f"Polling for Pipeline: Final status of polling for {name} : No response in {timeout} seconds - {response_body}" @@ -125,11 +141,16 @@ def poll(self, poll_url: Text, name: Text = "pipeline_process") -> Dict: Dict: response obtained by polling call """ - headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + headers = { + "x-api-key": self.api_key, + "Content-Type": "application/json", + } r = _request_with_retry("get", poll_url, headers=headers) try: resp = r.json() - logging.info(f"Single Poll for Pipeline: Status of polling for {name} : {resp}") + logging.info( + f"Single Poll for Pipeline: Status of polling for {name} : {resp}" + ) except Exception: resp = {"status": "FAILED"} return resp @@ -158,23 +179,35 @@ def run( """ start = time.time() try: - response = self.run_async(data, data_asset=data_asset, name=name, **kwargs) + response = self.run_async( + data, data_asset=data_asset, name=name, **kwargs + ) if response["status"] == "FAILED": end = time.time() response["elapsed_time"] = end - start return response poll_url = response["url"] end = time.time() - response = self.__polling(poll_url, name=name, timeout=timeout, wait_time=wait_time) + response = self.__polling( + poll_url, name=name, timeout=timeout, wait_time=wait_time + ) return response except Exception as e: error_message = f"Error in request for {name}: {str(e)}" logging.error(error_message) logging.exception(error_message) end = time.time() - return {"status": "FAILED", "error": error_message, "elapsed_time": end - start} + return { + "status": "FAILED", + "error": error_message, + "elapsed_time": end - start, + } - def __prepare_payload(self, data: Union[Text, Dict], data_asset: Optional[Union[Text, Dict]] = None) -> Dict: + def __prepare_payload( + self, + data: Union[Text, Dict], + data_asset: Optional[Union[Text, Dict]] = None, + ) -> Dict: """Prepare pipeline execution payload, validating the input data Args: @@ -184,7 +217,11 @@ def __prepare_payload(self, data: Union[Text, Dict], data_asset: Optional[Union[ Returns: Dict: pipeline execution payload """ - from aixplain.factories import CorpusFactory, DatasetFactory, FileFactory + from aixplain.factories import ( + CorpusFactory, + DatasetFactory, + FileFactory, + ) # if an input data asset is provided, just handle the data if data_asset is None: @@ -203,7 +240,10 @@ def __prepare_payload(self, data: Union[Text, Dict], data_asset: Optional[Union[ try: payload = json.loads(data) if isinstance(payload, dict) is False: - if isinstance(payload, int) is True or isinstance(payload, float) is True: + if ( + isinstance(payload, int) is True + or isinstance(payload, float) is True + ): payload = str(payload) payload = {"data": payload} except Exception: @@ -233,15 +273,33 @@ def __prepare_payload(self, data: Union[Text, Dict], data_asset: Optional[Union[ try: dasset = CorpusFactory.get(str(data_asset[node_label])) asset_payload["dataAsset"]["corpus_id"] = dasset.id - if len([d for d in dasset.data if d.id == data[node_label]]) > 0: + if ( + len( + [ + d + for d in dasset.data + if d.id == data[node_label] + ] + ) + > 0 + ): data_found = True except Exception: try: - dasset = DatasetFactory.get(str(data_asset[node_label])) + dasset = DatasetFactory.get( + str(data_asset[node_label]) + ) asset_payload["dataAsset"]["dataset_id"] = dasset.id if ( - len([dfield for dfield in dasset.source_data if dasset.source_data[dfield].id == data[node_label]]) + len( + [ + dfield + for dfield in dasset.source_data + if dasset.source_data[dfield].id + == data[node_label] + ] + ) > 0 ): data_found = True @@ -274,7 +332,11 @@ def __prepare_payload(self, data: Union[Text, Dict], data_asset: Optional[Union[ return payload def run_async( - self, data: Union[Text, Dict], data_asset: Optional[Union[Text, Dict]] = None, name: Text = "pipeline_process", **kwargs + self, + data: Union[Text, Dict], + data_asset: Optional[Union[Text, Dict]] = None, + name: Text = "pipeline_process", + **kwargs ) -> Dict: """Runs asynchronously a pipeline call. @@ -287,19 +349,26 @@ def run_async( Returns: Dict: polling URL in response """ - headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + headers = { + "x-api-key": self.api_key, + "Content-Type": "application/json", + } payload = self.__prepare_payload(data=data, data_asset=data_asset) payload.update(kwargs) payload = json.dumps(payload) call_url = f"{self.url}/{self.id}" logging.info(f"Start service for {name} - {call_url} - {payload}") - r = _request_with_retry("post", call_url, headers=headers, data=payload) + r = _request_with_retry( + "post", call_url, headers=headers, data=payload + ) resp = None try: resp = r.json() - logging.info(f"Result of request for {name} - {r.status_code} - {resp}") + logging.info( + f"Result of request for {name} - {r.status_code} - {resp}" + ) poll_url = resp["url"] response = {"status": "IN_PROGRESS", "url": poll_url} @@ -309,7 +378,12 @@ def run_async( response["error"] = resp return response - def update(self, pipeline: Union[Text, Dict], save_as_asset: bool = False, api_key: Optional[Text] = None): + def update( + self, + pipeline: Union[Text, Dict], + save_as_asset: bool = False, + api_key: Optional[Text] = None, + ): """Update Pipeline Args: @@ -331,16 +405,27 @@ def update(self, pipeline: Union[Text, Dict], save_as_asset: bool = False, api_k for i, node in enumerate(pipeline["nodes"]): if "functionType" in node and node["functionType"] == "AI": - pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][ + i + ]["functionType"].lower() # prepare payload status = "draft" if save_as_asset is True: status = "onboarded" - payload = {"name": self.name, "status": status, "architecture": pipeline} + payload = { + "name": self.name, + "status": status, + "architecture": pipeline, + } url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") api_key = api_key if api_key is not None else config.TEAM_API_KEY - headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} - logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}") + headers = { + "Authorization": f"Token {api_key}", + "Content-Type": "application/json", + } + logging.info( + f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}" + ) r = _request_with_retry("put", url, headers=headers, json=payload) response = r.json() logging.info(f"Pipeline {response['id']} Updated.") @@ -351,8 +436,13 @@ def delete(self) -> None: """Delete Dataset service""" try: url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - logging.info(f"Start service for DELETE Pipeline - {url} - {headers}") + headers = { + "Authorization": f"Token {config.TEAM_API_KEY}", + "Content-Type": "application/json", + } + logging.info( + f"Start service for DELETE Pipeline - {url} - {headers}" + ) r = _request_with_retry("delete", url, headers=headers) if r.status_code != 200: raise Exception() @@ -360,3 +450,54 @@ def delete(self) -> None: message = "Pipeline Deletion Error: Make sure the pipeline exists and you are the owner." logging.error(message) raise Exception(f"{message}") + + def save( + self, save_as_asset: bool = False, api_key: Optional[Text] = None + ): + """Save Pipeline + + Args: + save_as_asset (bool, optional): Save as asset (True) or draft (False). Defaults to False. + api_key (Optional[Text], optional): Team API Key to create the Pipeline. Defaults to None. + + Raises: + Exception: Make sure the pipeline to be save is in a JSON file. + """ + try: + pipeline = self.to_dict() + + for i, node in enumerate(pipeline["nodes"]): + if "functionType" in node and node["functionType"] == "AI": + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][ + i + ]["functionType"].lower() + # prepare payload + status = "draft" + if save_as_asset is True: + status = "onboarded" + payload = { + "name": self.name, + "status": status, + "architecture": pipeline, + } + + if self.id != "": + method = "put" + url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") + else: + method = "post" + url = urljoin(config.BACKEND_URL, "sdk/pipelines") + api_key = api_key if api_key is not None else config.TEAM_API_KEY + headers = { + "Authorization": f"Token {api_key}", + "Content-Type": "application/json", + } + logging.info( + f"Start service for Save Pipeline - {url} - {headers} - {json.dumps(payload)}" + ) + r = _request_with_retry(method, url, headers=headers, json=payload) + response = r.json() + self.id = response["id"] + logging.info(f"Pipeline {response['id']} Saved.") + except Exception as e: + raise Exception(e) diff --git a/aixplain/modules/pipeline/default.py b/aixplain/modules/pipeline/default.py new file mode 100644 index 00000000..b0499159 --- /dev/null +++ b/aixplain/modules/pipeline/default.py @@ -0,0 +1,17 @@ +from .asset import Pipeline as PipelineAsset +from .designer import DesignerPipeline + + +class DefaultPipeline(PipelineAsset, DesignerPipeline): + + def __init__(self, *args, **kwargs): + PipelineAsset.__init__(self, *args, **kwargs) + DesignerPipeline.__init__(self) + + def save(self, *args, **kwargs): + self.auto_infer() + self.validate() + super().save(*args, **kwargs) + + def to_dict(self) -> dict: + return self.serialize() diff --git a/aixplain/modules/pipeline/designer/README.md b/aixplain/modules/pipeline/designer/README.md new file mode 100644 index 00000000..a1806868 --- /dev/null +++ b/aixplain/modules/pipeline/designer/README.md @@ -0,0 +1,194 @@ +# Aixplan SDK User Guide + +## Introduction + +Aixplan SDK provides a programmatic API to create pipelines for building solutions on the Aixplain platform. + +## Minimal Example + +Here's a quick example to get you started: + +```python +from aixplain.factories.pipeline_factory import PipelineFactory + +TRANSLATION_ASSET_ID = 'your-translation-asset-id' + +pipeline = PipelineFactory.init('Translation Pipeline') +input_node = pipeline.input() +translation_node = pipeline.translation(assetId=TRANSLATION_ASSET_ID) + +input_node.link(translation_node, 'input', 'text') + +output_node = translation_node.use_output('data') + +pipeline.save() +outputs = pipeline.run('This is example text to translate') + +print(outputs) +``` + +## Instantiating Nodes + +To create a pipeline and instantiate nodes, use the following code: + +```python +from aixplain.factories.pipeline_factory import PipelineFactory +from aixplain.modules.pipeline.designer import Input + +pipeline = PipelineFactory.init("My Pipeline") +input_node = Input(*args, **kwargs) +input_node.attach(pipeline) +``` + +Alternatively, add nodes to the pipeline using `add_node`: + +```python +input_node = pipeline.add_node(Input(*args, **kwargs)) +``` + +You can also pass the pipeline to the node constructor: + +```python +input_node = Input(*args, pipeline=pipeline, **kwargs) +``` + +Or directly instantiate the node within the pipeline: + +```python +input_node = pipeline.input(*args, **kwargs) +``` + +## Adding Output Nodes + +Each pipeline should have at least one input, asset, and output node. Add output nodes like any other node: + +```python +translation_node = pipeline.translation(assetId=TRANSLATION_ASSET_ID) +output_node = pipeline.output(*args, **kwargs) +translation_node.link(output_node, 'data', 'output') +``` + +For nodes implementing the `Outputable` mixin, use the shortcut syntax: + +```python +output_node = translation_node.use_output('parameter_name_we_are_interested_in') +``` + +## Asset Nodes and Automatic Population + +Asset nodes are used to run models and should have an asset ID. Once instantiated, an asset node contains all model information and parameters which is populated automatically by interacting with the Aixplain platform. + +```python +translation_node = pipeline.translation(assetId=TRANSLATION_ASSET_ID) +print(translation_node.inputs) +print(translation_node.outputs) +``` + +## Handling Parameters + +Parameters are accessed via the `inputs` and `outputs` attributes of the node, behaving as proxy objects to the parameters. + +```python +print(translation_node.inputs.text) +print(translation_node.outputs.data) +``` + +Add parameters to a node using `create_param` on corresponding `inputs` or `outputs` attribute: + +```python +translation_node.inputs.create_param('source_language', DataType.TEXT) +translation_node.outputs.create_param('source_audio', DataType.AUDIO) +``` + +Alternatively, instantiate parameters directly using `InputParam` or `OutputParam` classes: + +```python +from aixplain.modules.pipeline.designer import InputParam, OutputParam + +source_language = InputParam( + code='source_language', + dataType=DataType.TEXT, + is_required=True, + node=translation_node +) +``` + +Or add parameters explicitly: + +```python +source_audio = OutputParam(dataType=DataType.AUDIO, code='source_audio') +translation_node.outputs.add_param(source_audio) +``` + +In case of need, any parameter value can be set directly without requiring node linking: + +```python +translation_node.inputs.text = 'This is example text to translate' +translation_node.inputs.source_language = 'en' +``` + +This will implicity set the `value` attribute of the parameter object. + +## Linking Nodes + +Link nodes to pass data between them using the `link` method. This method links the output of one node to the input of another on specified parameters. + +Consider the following nodes: + +```python +input_node = pipeline.input() +translation_node = pipeline.translation(assetId=TRANSLATION_ASSET_ID) +``` + +Link nodes together: + +```python +input_node.link(translation_node, 'input', 'text') +``` + +Specify parameters explicitly: + +```python +input_node.link(translation_node, from_param='input', to_param='text') +``` + +Or use parameter instances: + +```python +input_node.link(translation_node, from_param=input_node.outputs.input, to_param=translation_node.inputs.text) +``` + +You can also link parameters directly if you find it more convenient: + +```python +input_node.outputs.input.link(translation_node.inputs.text) +``` + +## Validating the Pipeline + +Use the `validate` method to ensure the pipeline is valid and ready to run. This method raises an exception if the pipeline has issues. + +```python +pipeline.validate() +``` + +This method will check the following: + * Contains at least one input, asset, and output node + * All input nodes are linked in, output nodes are linked out, and rest are linked in and out + * All links pointing to the correct nodes and corresponding params. + * All required params are either set or linked + * All linked params have the same data type + +Otherwise raises `ValueError` with a cause if the pipeline is not valid. + +## Save and Run the Pipeline + +Save the pipeline before running it. The `save` method implicitly calls the `validate` method. Use the `run` method to execute the pipeline with input data. + +```python +pipeline.save() # Raises an exception if there are semantic issues +outputs = pipeline.run('This is example text to translate') +print(outputs) +``` + +This guide covers the basic usage of the programmatic api of Aixplan SDK for creating and running pipelines. For more advanced features, refer to the code itself. \ No newline at end of file diff --git a/aixplain/modules/pipeline/designer/__init__.py b/aixplain/modules/pipeline/designer/__init__.py new file mode 100644 index 00000000..0bb56542 --- /dev/null +++ b/aixplain/modules/pipeline/designer/__init__.py @@ -0,0 +1,66 @@ +from .nodes import ( + AssetNode, + Decision, + Script, + Input, + Output, + Route, + Router, + BaseReconstructor, + BaseSegmentor, +) +from .pipeline import DesignerPipeline +from .base import ( + Node, + Link, + Param, + ParamProxy, + InputParam, + OutputParam, + Inputs, + Outputs, + TI, + TO, +) +from .enums import ( + ParamType, + RouteType, + Operation, + NodeType, + AssetType, + FunctionType, +) +from .mixins import LinkableMixin, OutputableMixin, RoutableMixin + + +__all__ = [ + "DesignerPipeline", + "AssetNode", + "Decision", + "Script", + "Input", + "Output", + "Route", + "Router", + "BaseReconstructor", + "BaseSegmentor", + "Node", + "Link", + "Param", + "ParamType", + "InputParam", + "OutputParam", + "RouteType", + "Operation", + "NodeType", + "AssetType", + "FunctionType", + "LinkableMixin", + "OutputableMixin", + "RoutableMixin", + "Inputs", + "Outputs", + "ParamProxy", + "TI", + "TO", +] diff --git a/aixplain/modules/pipeline/designer/base.py b/aixplain/modules/pipeline/designer/base.py new file mode 100644 index 00000000..8bea73d6 --- /dev/null +++ b/aixplain/modules/pipeline/designer/base.py @@ -0,0 +1,375 @@ +from typing import ( + List, + Union, + TYPE_CHECKING, + Generic, + TypeVar, + Type, + Optional, + Iterator, +) + +from aixplain.enums import DataType +from .enums import NodeType, ParamType + + +if TYPE_CHECKING: + from .pipeline import DesignerPipeline + +TI = TypeVar("TI", bound="Inputs") +TO = TypeVar("TO", bound="Outputs") + + +class Serializable: + def serialize(self) -> dict: + raise NotImplementedError() + + +class Param(Serializable): + """ + Param class, this class will be used to create the parameters of the node. + """ + + code: str + param_type: ParamType + data_type: Optional[DataType] = None + value: Optional[str] = None + node: Optional["Node"] = None + link_: Optional["Link"] = None + + def __init__( + self, + code: str, + data_type: Optional[DataType] = None, + value: Optional[str] = None, + node: Optional["Node"] = None, + param_type: Optional[ParamType] = None, + ): + self.code = code + self.data_type = data_type + self.value = value + + # is subclasses do not set the param type, set it to None + self.param_type = getattr(self, "param_type", param_type) + + if node: + self.attach_to(node) + + def attach_to(self, node: "Node") -> "Param": + """ + Attach the param to the node. + :param node: the node + :return: the param + """ + assert not self.node, "Param already attached to a node" + assert self.param_type, "Param type not set" + if self.param_type == ParamType.INPUT: + node.inputs.add_param(self) + elif self.param_type == ParamType.OUTPUT: + node.outputs.add_param(self) + else: + raise ValueError("Invalid param type") + self.node = node + return self + + def link(self, to_param: "Param") -> "Param": + """ + Link the output of the param to the input of another param. + :param to_param: the input param + :return: the param + """ + assert self.node, "Param not attached to a node" + assert to_param.param_type == ParamType.INPUT, "Invalid param type" + assert self in self.node.outputs, "Param not registered as output" + return to_param.back_link(self) + + def back_link(self, from_param: "Param") -> "Param": + """ + Link the input of the param to the output of another param. + :param from_param: the output param + :return: the param + """ + assert self.node, "Param not attached to a node" + assert from_param.param_type == ParamType.OUTPUT, "Invalid param type" + assert self.code in self.node.inputs, "Param not registered as input" + link = from_param.node.link(self.node, from_param, self) + self.link_ = link + from_param.link_ = link + return link + + def serialize(self) -> dict: + return { + "code": self.code, + "dataType": self.data_type, + "value": self.value, + } + + +class InputParam(Param): + + param_type: ParamType = ParamType.INPUT + is_required: bool = True + + def __init__(self, *args, is_required: bool = True, **kwargs): + super().__init__(*args, **kwargs) + self.is_required = is_required + + +class OutputParam(Param): + + param_type: ParamType = ParamType.OUTPUT + + +class Link(Serializable): + """ + Link class, this class will be used to link the output of the node to the + input of another node. + """ + + from_node: "Node" + to_node: "Node" + from_param: str + to_param: str + + pipeline: Optional["DesignerPipeline"] = None + + def __init__( + self, + from_node: "Node", + to_node: "Node", + from_param: Union[Param, str], + to_param: Union[Param, str], + pipeline: "DesignerPipeline" = None, + ): + + assert from_param in from_node.outputs, "Invalid from param" + assert to_param in to_node.inputs, "Invalid to param" + + if isinstance(from_param, Param): + from_param = from_param.code + if isinstance(to_param, Param): + to_param = to_param.code + + self.from_node = from_node + self.to_node = to_node + self.from_param = from_param + self.to_param = to_param + + if pipeline: + self.attach_to(pipeline) + + # self.validate() + self.auto_infer() + + def auto_infer(self): + from_param = self.from_node.outputs[self.from_param] + to_param = self.to_node.inputs[self.to_param] + + # if one of the data types is missing, infer the other one + data_type = from_param.data_type or to_param.data_type + from_param.data_type = data_type + to_param.data_type = data_type + + def infer_data_type(node): + from .nodes import Input, Output + + if isinstance(node, Input) or isinstance(node, Output): + if data_type and data_type not in node.data_types: + node.data_types.append(data_type) + + infer_data_type(self.from_node) + infer_data_type(self.to_node) + + def validate(self): + from_param = self.from_node.outputs[self.from_param] + to_param = self.to_node.inputs[self.to_param] + + # Should we check for data type mismatch? + if from_param.data_type and to_param.data_type: + if from_param.data_type != to_param.data_type: + raise ValueError( + f"Data type mismatch between {from_param.data_type} and {to_param.data_type}" + ) # noqa + + def attach_to(self, pipeline: "DesignerPipeline"): + """ + Attach the link to the pipeline. + :param pipeline: the pipeline + """ + assert not self.pipeline, "Link already attached to a pipeline" + if not self.from_node.pipeline or self.from_node not in pipeline.nodes: + self.from_node.attach_to(pipeline) + if not self.to_node.pipeline or self.to_node not in pipeline.nodes: + self.to_node.attach_to(pipeline) + + self.pipeline = pipeline + self.pipeline.links.append(self) + return self + + def serialize(self) -> dict: + assert self.from_node.number is not None, "From node number not set" + assert self.to_node.number is not None, "To node number not set" + return { + "from": self.from_node.number, + "to": self.to_node.number, + "paramMapping": [ + { + "from": self.from_param, + "to": self.to_param, + } + ], + } + + +class ParamProxy(Serializable): + + node: "Node" + + def __init__(self, node: "Node", *args, **kwargs): + super().__init__(*args, **kwargs) + self.node = node + self._params = [] + + def add_param(self, param: Param) -> None: + # check if param already registered + if param in self: + raise ValueError( + f"Parameter with code '{param.code}' already exists." + ) + self._params.append(param) + # also set attribute on the node dynamically if there's no + # any attribute with the same name + if not hasattr(self, param.code): + setattr(self, param.code, param) + + def _create_param( + self, code: str, data_type: DataType = None, value: any = None + ) -> Param: + raise NotImplementedError() + + def create_param( + self, + code: str, + data_type: DataType = None, + value: any = None, + is_required: bool = False, + ) -> Param: + param = self._create_param(code, data_type, value) + param.is_required = is_required + self.add_param(param) + param.node = self.node + return param + + def __getitem__(self, code: str) -> Param: + for param in self._params: + if param.code == code: + return param + raise KeyError(f"Parameter with code '{code}' not found.") + + def __setitem__(self, code: str, value: str) -> None: + # set param value on set item to avoid setting it manually + self[code].value = value + + def __setattr__(self, name: str, value: any) -> None: + # set param value on attribute assignment to avoid setting it manually + if isinstance(value, str) and hasattr(self, name): + self[name].value = value + else: + super().__setattr__(name, value) + + def __contains__(self, param: Union[str, Param]) -> bool: + code = param if isinstance(param, str) else param.code + return any(param.code == code for param in self._params) + + def __iter__(self) -> Iterator[Param]: + return iter(self._params) + + def __len__(self) -> int: + return len(self._params) + + def serialize(self) -> List[dict]: + return [param.serialize() for param in self._params] + + +class Inputs(ParamProxy): + def _create_param( + self, + code: str, + data_type: DataType = None, + value: any = None, + is_required: bool = False, + ) -> InputParam: + return InputParam( + code=code, + data_type=data_type, + value=value, + is_required=is_required, + ) + + +class Outputs(ParamProxy): + def _create_param( + self, code: str, data_type: DataType = None, value: any = None + ) -> OutputParam: + return OutputParam(code=code, data_type=data_type, value=value) + + +class Node(Generic[TI, TO], Serializable): + """ + Node class is the base class for all the nodes in the pipeline. This class + will be used to create the nodes and link them together. + """ + + number: Optional[int] = None + label: Optional[str] = None + type: Optional[NodeType] = None + + inputs: Optional[TI] = None + outputs: Optional[TO] = None + inputs_class: Optional[Type[TI]] = Inputs + outputs_class: Optional[Type[TO]] = Outputs + pipeline: Optional["DesignerPipeline"] = None + + def __init__( + self, + pipeline: "DesignerPipeline" = None, + number: Optional[int] = None, + label: Optional[str] = None, + ): + self.inputs = self.inputs_class(node=self) + self.outputs = self.outputs_class(node=self) + self.number = number + self.label = label + + if pipeline: + self.attach_to(pipeline) + + def attach_to(self, pipeline: "DesignerPipeline"): + """ + Attach the node to the pipeline. + :param pipeline: the pipeline + """ + assert not self.pipeline, "Node already attached to a pipeline" + assert ( + self not in pipeline.nodes + ), "Node already attached to a pipeline" + assert self.type, "Node type not set" + + self.pipeline = pipeline + if self.number is None: + self.number = len(pipeline.nodes) + if self.label is None: + self.label = f"{self.type.value}(ID={self.number})" + + assert not pipeline.get_node(self.number), "Node number already exists" + pipeline.nodes.append(self) + return self + + def serialize(self) -> dict: + return { + "number": self.number, + "label": self.label, + "type": self.type.value, + "inputValues": self.inputs.serialize(), + "outputValues": self.outputs.serialize(), + } diff --git a/aixplain/modules/pipeline/designer/enums.py b/aixplain/modules/pipeline/designer/enums.py new file mode 100644 index 00000000..4c044dba --- /dev/null +++ b/aixplain/modules/pipeline/designer/enums.py @@ -0,0 +1,43 @@ +from enum import Enum + + +class RouteType(str, Enum): + CHECK_TYPE = "checkType" + CHECK_VALUE = "checkValue" + + +class Operation(str, Enum): + GREATER_THAN = "greaterThan" + GREATER_THAN_OR_EQUAL = "greaterThanOrEqual" + LESS_THAN = "lessThan" + LESS_THAN_OR_EQUAL = "lessThanOrEqual" + EQUAL = "equal" + DIFFERENT = "different" + CONTAIN = "contain" + NOT_CONTAIN = "notContain" + + +class NodeType(str, Enum): + ASSET = "ASSET" + INPUT = "INPUT" + OUTPUT = "OUTPUT" + SCRIPT = "SCRIPT" + SEGMENTOR = "SEGMENT" + RECONSTRUCTOR = "RECONSTRUCT" + ROUTER = "ROUTER" + DECISION = "DECISION" + + +class AssetType(str, Enum): + MODEL = "MODEL" + + +class FunctionType(str, Enum): + AI = "AI" + SEGMENTOR = "SEGMENTOR" + RECONSTRUCTOR = "RECONSTRUCTOR" + + +class ParamType: + INPUT = "INPUT" + OUTPUT = "OUTPUT" diff --git a/aixplain/modules/pipeline/designer/mixins.py b/aixplain/modules/pipeline/designer/mixins.py new file mode 100644 index 00000000..e5aad3c4 --- /dev/null +++ b/aixplain/modules/pipeline/designer/mixins.py @@ -0,0 +1,85 @@ +from typing import Union +from .base import Node, Link, Param + + +class LinkableMixin: + """ + Linkable mixin class, this class will be used to link the output of the + node to the input of another node. + + This class will be used to link the output of the node to the input of + another node. + """ + + def link( + self, + to_node: Node, + from_param: Union[str, Param], + to_param: Union[str, Param], + ) -> Link: + """ + Link the output of the node to the input of another node. This method + will link the output of the node to the input of another node. + + :param to_node: the node to link to the output + :param from_param: the output parameter or the code of the output + parameter + :param to_param: the input parameter or the code of the input parameter + :return: the link + """ + return Link( + pipeline=self.pipeline, + from_node=self, + to_node=to_node, + from_param=from_param, + to_param=to_param, + ) + + +class RoutableMixin: + """ + Routable mixin class, this class will be used to route the input data to + different nodes based on the input data type. + """ + + def route(self, *params: Param) -> Node: + """ + Route the input data to different nodes based on the input data type. + This method will automatically link the input data to the output data + of the node. + + :param params: the output parameters + :return: the router node + """ + assert self.pipeline, "Node not attached to a pipeline" + + router = self.pipeline.router( + [(param.data_type, param.node) for param in params] + ) + self.outputs.input.link(router.inputs.input) + for param in params: + router.outputs.input.link(param) + return router + + +class OutputableMixin: + """ + Outputable mixin class, this class will be used to link the output of the + node to the output node of the pipeline. + """ + + def use_output(self, param: Union[str, Param]) -> Node: + """ + Use the output of the node as the output of the pipeline. + This method will automatically link the output of the node to the + output node of the pipeline. + + :param param: the output parameter or the code of the output parameter + :return: the output node + """ + assert self.pipeline, "Node not attached to a pipeline" + output = self.pipeline.output() + if isinstance(param, str): + param = self.outputs[param] + param.link(output.inputs.output) + return output diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py new file mode 100644 index 00000000..22152239 --- /dev/null +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -0,0 +1,464 @@ +from typing import List, Union, Type, TYPE_CHECKING, Optional + +from aixplain.modules import Model +from aixplain.enums import DataType + +from .enums import ( + NodeType, + FunctionType, + RouteType, + Operation, + AssetType, +) +from .base import ( + Node, + Link, + Param, + InputParam, + OutputParam, + TI, + TO, + Inputs, + Outputs, + Serializable, +) +from .mixins import LinkableMixin, OutputableMixin, RoutableMixin + +if TYPE_CHECKING: + from .pipeline import DesignerPipeline + + +class AssetNode(Node[TI, TO], LinkableMixin, OutputableMixin): + """ + Asset node class, this node will be used to fetch the asset from the + aixplain platform and use it in the pipeline. + + `assetId` is required and will be used to fetch the asset from the + aixplain platform. + + Input and output parameters will be automatically added based on the + asset function spec. + """ + + asset_id: Union[Model, str] = None + function: str = None + supplier: str = None + version: str = None + assetType: AssetType = AssetType.MODEL + functionType: FunctionType = FunctionType.AI + + type: NodeType = NodeType.ASSET + + def __init__( + self, + asset_id: Union[Model, str] = None, + supplier: str = None, + version: str = None, + pipeline: "DesignerPipeline" = None, + ): + super().__init__(pipeline=pipeline) + self.asset_id = asset_id + self.supplier = supplier + self.version = version + + if self.asset_id: + self.populate_asset() + + def populate_asset(self): + from aixplain.factories.model_factory import ModelFactory + + if isinstance(self.asset_id, str): + self.asset = ModelFactory.get(self.asset_id) + elif isinstance(self.asset_id, Model): + self.asset = self.asset_id + self.asset_id = self.asset_id.id + else: + raise ValueError("assetId should be a string or an Asset instance") + + try: + self.supplier = self.asset.supplier.value["code"] + except Exception: + self.supplier = str(self.asset.supplier) + + self.version = self.asset.version + + if self.function: + if self.asset.function.value != self.function: + raise ValueError( + f"Function {self.function} is not supported by asset {self.asset_id}" + ) # noqa + else: + self.function = self.asset.function.value + self._auto_populate_params() + + self._auto_set_params() + + def _auto_populate_params(self): + from aixplain.enums.function import FunctionInputOutput + + spec = FunctionInputOutput[self.asset.function.value]["spec"] + for item in spec["params"]: + self.inputs.create_param( + code=item["code"], + data_type=item["dataType"], + is_required=item["required"], + ) + + for item in spec["output"]: + self.outputs.create_param( + code=item["code"], + data_type=item["dataType"], + ) + + def _auto_set_params(self): + for k, v in self.asset.additional_info["parameters"].items(): + if isinstance(v, list): + self.inputs[k] = v[0] + else: + self.inputs[k] = v + + def serialize(self) -> dict: + obj = super().serialize() + obj["function"] = self.function + obj["assetId"] = self.asset_id + obj["supplier"] = self.supplier + obj["version"] = self.version + obj["assetType"] = self.assetType + obj["functionType"] = self.functionType + obj["type"] = self.type + return obj + + +class InputInputs(Inputs): + pass + + +class InputOutputs(Outputs): + input: OutputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.input = self.create_param("input") + + +class Input(Node[InputInputs, InputOutputs], LinkableMixin, RoutableMixin): + """ + Input node class, this node will be used to input the data to the + pipeline. + + Input nodes has only one output parameter called `input`. + + `data` is a special convenient parameter that will be uploaded to the + aixplain platform and the link will be passed as the input to the node. + """ + + data_types: Optional[List[DataType]] = None + data: Optional[str] = None + type: NodeType = NodeType.INPUT + inputs_class: Type[TI] = InputInputs + outputs_class: Type[TO] = InputOutputs + + def __init__( + self, + data: Optional[str] = None, + data_types: Optional[List[DataType]] = None, + pipeline: "DesignerPipeline" = None, + ): + from aixplain.factories.file_factory import FileFactory + + super().__init__(pipeline=pipeline) + self.data_types = data_types or [] + self.data = data + + if self.data: + self.data = FileFactory.to_link(self.data, is_temp=True) + + def serialize(self) -> dict: + obj = super().serialize() + obj["data"] = self.data + obj["dataType"] = self.data_types + return obj + + +class OutputInputs(Inputs): + output: InputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.output = self.create_param("output") + + +class OutputOutputs(Outputs): + pass + + +class Output(Node[OutputInputs, OutputOutputs]): + """ + Output node class, this node will be used to output the result of the + pipeline. + + Output nodes has only one input parameter called `output`. + """ + + data_types: Optional[List[DataType]] = None + type: NodeType = NodeType.OUTPUT + inputs_class: Type[TI] = OutputInputs + outputs_class: Type[TO] = OutputOutputs + + def __init__( + self, + data_types: Optional[List[DataType]] = None, + pipeline: "DesignerPipeline" = None, + ): + super().__init__(pipeline=pipeline) + self.data_types = data_types or [] + + def serialize(self) -> dict: + obj = super().serialize() + obj["dataType"] = self.data_types + return obj + + +class Script(Node[TI, TO], LinkableMixin, OutputableMixin): + """ + Script node class, this node will be used to run a script on the input + data. + + `script_path` is a special convenient parameter that will be uploaded to + the aixplain platform and the link will be passed as the input to the node. + """ + + fileId: Optional[str] = None + script_path: Optional[str] = None + type: NodeType = NodeType.SCRIPT + + def __init__( + self, + pipeline: "DesignerPipeline" = None, + script_path: Optional[str] = None, + fileId: Optional[str] = None, + ): + from aixplain.factories.script_factory import ScriptFactory + + super().__init__(pipeline=pipeline) + + assert script_path or fileId, "script_path or fileId is required" + + if not fileId: + self.fileId = ScriptFactory.upload_script(script_path) + else: + self.fileId = fileId + + def serialize(self) -> dict: + obj = super().serialize() + obj["fileId"] = self.fileId + return obj + + +class Route(Serializable): + """ + Route class, this class will be used to route the input data to different + nodes based on the input data type. + """ + + value: DataType + path: List[Union[Node, int]] + operation: Operation + type: RouteType + + def __init__( + self, + value: DataType, + path: List[Union[Node, int]], + operation: Operation, + type: RouteType, + ): + """ + Post init method to convert the nodes to node numbers if they are + nodes. + """ + self.value = value + self.path = path + self.operation = operation + self.type = type + + if not self.path: + raise ValueError("Path is not valid, should be a list of nodes") + + # convert nodes to node numbers if they are nodes + self.path = [ + node.number if isinstance(node, Node) else node + for node in self.path + ] + + def serialize(self) -> dict: + return { + "value": self.value, + "path": self.path, + "operation": self.operation, + "type": self.type, + } + + +class RouterInputs(Inputs): + input: InputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.input = self.create_param("input") + + +class RouterOutputs(Outputs): + input: OutputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.input = self.create_param("input") + + +class Router(Node[RouterInputs, RouterOutputs], LinkableMixin): + """ + Router node class, this node will be used to route the input data to + different nodes based on the input data type. + """ + + routes: List[Route] + type: NodeType = NodeType.ROUTER + inputs_class: Type[TI] = RouterInputs + outputs_class: Type[TO] = RouterOutputs + + def __init__( + self, routes: List[Route], pipeline: "DesignerPipeline" = None + ): + super().__init__(pipeline=pipeline) + self.routes = routes + + def serialize(self) -> dict: + obj = super().serialize() + obj["routes"] = [route.serialize() for route in self.routes] + return obj + + +class DecisionInputs(Inputs): + comparison: InputParam = None + passthrough: InputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.comparison = self.create_param("comparison") + self.passthrough = self.create_param("passthrough") + + +class DecisionOutputs(Outputs): + input: OutputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.input = self.create_param("input") + + +class Decision(Node[DecisionInputs, DecisionOutputs], LinkableMixin): + """ + Decision node class, this node will be used to make decisions based on + the input data. + """ + + routes: List[Route] + type: NodeType = NodeType.DECISION + inputs_class: Type[TI] = DecisionInputs + outputs_class: Type[TO] = DecisionOutputs + + def __init__( + self, routes: List[Route], pipeline: "DesignerPipeline" = None + ): + super().__init__(pipeline=pipeline) + self.routes = routes + + def link( + self, + to_node: Node, + from_param: Union[str, Param], + to_param: Union[str, Param], + ) -> Link: + link = super().link(to_node, from_param, to_param) + self.outputs.input.data_type = self.inputs.passthrough.data_type + return link + + def serialize(self) -> dict: + obj = super().serialize() + obj["routes"] = [route.serialize() for route in self.routes] + return obj + + +class BaseSegmentor(AssetNode[TI, TO]): + """ + Segmentor node class, this node will be used to segment the input data + into smaller fragments for much easier and efficient processing. + """ + + type: NodeType = NodeType.SEGMENTOR + functionType: FunctionType = FunctionType.SEGMENTOR + + +class SegmentorInputs(Inputs): + pass + + +class SegmentorOutputs(Outputs): + audio: OutputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.audio = self.create_param("audio") + + +class BareSegmentor(BaseSegmentor[SegmentorInputs, SegmentorOutputs]): + """ + Segmentor node class, this node will be used to segment the input data + into smaller fragments for much easier and efficient processing. + """ + + type: NodeType = NodeType.SEGMENTOR + functionType: FunctionType = FunctionType.SEGMENTOR + inputs_class: Type[TI] = SegmentorInputs + outputs_class: Type[TO] = SegmentorOutputs + + +class BaseReconstructor(AssetNode[TI, TO]): + """ + Reconstructor node class, this node will be used to reconstruct the + output of the segmented lines of execution. + """ + + type: NodeType = NodeType.RECONSTRUCTOR + functionType: FunctionType = FunctionType.RECONSTRUCTOR + + +class ReconstructorInputs(Inputs): + data: InputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.data = self.create_param("data") + + +class ReconstructorOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.data = self.create_param("data") + + +class BareReconstructor( + BaseReconstructor[ReconstructorInputs, ReconstructorOutputs] +): + """ + Reconstructor node class, this node will be used to reconstruct the + output of the segmented lines of execution. + """ + + type: NodeType = NodeType.RECONSTRUCTOR + functionType: FunctionType = FunctionType.RECONSTRUCTOR + inputs_class: Type[TI] = ReconstructorInputs + outputs_class: Type[TO] = ReconstructorOutputs diff --git a/aixplain/modules/pipeline/designer/pipeline.py b/aixplain/modules/pipeline/designer/pipeline.py new file mode 100644 index 00000000..5304d202 --- /dev/null +++ b/aixplain/modules/pipeline/designer/pipeline.py @@ -0,0 +1,328 @@ +from typing import List, Type, Tuple, TypeVar + +from aixplain.enums import DataType + +from .base import Serializable, Node, Link +from .nodes import ( + AssetNode, + Decision, + Script, + Input, + Output, + Router, + Route, + BareReconstructor, + BareSegmentor, +) +from .enums import NodeType, RouteType, Operation + + +T = TypeVar("T", bound="AssetNode") + + +class DesignerPipeline(Serializable): + nodes: List[Node] = None + links: List[Link] = None + instance: any = None + + def __init__(self): + self.nodes = [] + self.links = [] + + def add_node(self, node: Node): + """ + Add a node to the current pipeline. + + This method will take care of setting the pipeline instance to the + node and setting the node number if it's not set. + + :param node: the node + :return: the node + """ + return node.attach_to(self) + + def add_nodes(self, *nodes: Node) -> List[Node]: + """ + Add multiple nodes to the current pipeline. + + :param nodes: the nodes + :return: the nodes + """ + return [self.add_node(node) for node in nodes] + + def add_link(self, link: Link) -> Link: + """ + Add a link to the current pipeline. + :param link: the link + :return: the link + """ + return link.attach_to(self) + + def serialize(self) -> dict: + """ + Serialize the pipeline to a dictionary. This method will serialize the + pipeline to a dictionary. + + :return: the pipeline as a dictionary + """ + return { + "nodes": [node.serialize() for node in self.nodes], + "links": [link.serialize() for link in self.links], + } + + def validate_nodes(self): + """ + Validate the linkage of the pipeline. This method will validate the + linkage of the pipeline by applying the following checks: + - All input nodes are linked out + - All output nodes are linked in + - All other nodes are linked in and out + + :raises ValueError: if the pipeline is not valid + """ + link_from_map = {link.from_node.number: link for link in self.links} + link_to_map = {link.to_node.number: link for link in self.links} + contains_input = False + contains_output = False + contains_asset = False + for node in self.nodes: + # validate every input node is linked out + if node.type == NodeType.INPUT: + contains_input = True + if node.number not in link_from_map: + raise ValueError(f"Input node {node.label} not linked out") + # validate every output node is linked in + elif node.type == NodeType.OUTPUT: + contains_output = True + if node.number not in link_to_map: + raise ValueError(f"Output node {node.label} not linked in") + # validate rest of the nodes are linked in and out + else: + if isinstance(node, AssetNode): + contains_asset = True + if node.number not in link_from_map: + raise ValueError(f"Node {node.label} not linked in") + if node.number not in link_to_map: + raise ValueError(f"Node {node.label} not linked out") + + if not contains_input or not contains_output or not contains_asset: + raise ValueError( + "Pipeline must contain at least one input, output and asset node" # noqa + ) + + def is_param_linked(self, node, param): + """ + Check if the param is linked to another node. This method will check + if the param is linked to another node. + :param node: the node + :param param: the param + :return: True if the param is linked, False otherwise + """ + for link in self.links: + if ( + link.to_node.number == node.number + and param.code == link.to_param + ): + return True + + return False + + def is_param_set(self, node, param): + """ + Check if the param is set. This method will check if the param is set + or linked to another node. + :param node: the node + :param param: the param + :return: True if the param is set, False otherwise + """ + return param.value or self.is_param_linked(node, param) + + def validate_params(self): + """ + This method will check if all required params are either set or linked + + :raises ValueError: if the pipeline is not valid + """ + for node in self.nodes: + for param in node.inputs: + if param.is_required and not self.is_param_set(node, param): + raise ValueError( + f"Param {param.code} of node {node.label} is required" + ) + + def validate(self): + """ + Validate the pipeline. This method will validate the pipeline by + series of checks: + - Validate all nodes are linked correctly + - Validate all required params are set or linked + + Any other validation checks can be added here. + + :raises ValueError: if the pipeline is not valid + """ + self.validate_nodes() + self.validate_params() + + def get_link(self, from_node: int, to_node: int) -> Link: + """ + Get the link between two nodes. This method will return the link + between two nodes. + + :param from_node: the from node number + :param to_node: the to node number + :return: the link + """ + return next( + ( + link + for link in self.links + if link.from_node == from_node and link.to_node == to_node + ), + None, + ) + + def get_node(self, node_number: int) -> Node: + """ + Get the node by its number. This method will return the node with the + given number. + + :param node_number: the node number + :return: the node + """ + return next( + (node for node in self.nodes if node.number == node_number), None + ) + + def auto_infer(self): + """ + Automatically infer the data types of the nodes in the pipeline. + This method will automatically infer the data types of the nodes in the + pipeline by traversing the pipeline and setting the data types of the + nodes based on the data types of the connected nodes. + """ + for link in self.links: + from_node = self.get_node(link.from_node) + to_node = self.get_node(link.to_node) + if not from_node or not to_node: + continue # will be handled by the validation + for param in link.param_mapping: + from_param = from_node.outputs[param.from_param] + to_param = to_node.inputs[param.to_param] + if not from_param or not to_param: + continue # will be handled by the validation + # if one of the data types is missing, infer the other one + dataType = from_param.data_type or to_param.data_type + from_param.data_type = dataType + to_param.data_type = dataType + + def infer_data_type(node): + from .nodes import Input, Output + + if isinstance(node, Input) or isinstance(node, Output): + if dataType and dataType not in node.data_types: + node.data_types.append(dataType) + + infer_data_type(self) + infer_data_type(to_node) + + def asset( + self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs + ) -> T: + """ + Shortcut to create an asset node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + return asset_class(asset_id, *args, pipeline=self, **kwargs) + + def decision(self, *args, **kwargs) -> Decision: + """ + Shortcut to create an decision node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + return Decision(*args, pipeline=self, **kwargs) + + def script(self, *args, **kwargs) -> Script: + """ + Shortcut to create an script node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + return Script(*args, pipeline=self, **kwargs) + + def input(self, *args, **kwargs) -> Input: + """ + Shortcut to create an input node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + return Input(*args, pipeline=self, **kwargs) + + def output(self, *args, **kwargs) -> Output: + """ + Shortcut to create an output node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + return Output(*args, pipeline=self, **kwargs) + + def router(self, routes: Tuple[DataType, Node], *args, **kwargs) -> Router: + """ + Shortcut to create an decision node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. The routes will be handled specially and will be + converted to Route instances in a convenient way. + + :param routes: the routes + :param kwargs: keyword arguments + :return: the node + """ + kwargs["routes"] = [ + Route( + value=route[0], + path=[route[1]], + type=RouteType.CHECK_TYPE, + operation=Operation.EQUAL, + ) + for route in routes + ] + return Router(*args, pipeline=self, **kwargs) + + def bare_reconstructor(self, *args, **kwargs) -> BareReconstructor: + """ + Shortcut to create an reconstructor node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + return BareReconstructor(*args, pipeline=self, **kwargs) + + def bare_segmentor(self, *args, **kwargs) -> BareSegmentor: + """ + Shortcut to create an segmentor node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + return BareSegmentor(*args, pipeline=self, **kwargs) diff --git a/aixplain/modules/pipeline/generate.py b/aixplain/modules/pipeline/generate.py new file mode 100644 index 00000000..c71e8ae6 --- /dev/null +++ b/aixplain/modules/pipeline/generate.py @@ -0,0 +1,227 @@ +import pathlib + +import requests +from urllib.parse import urljoin +from jinja2 import Environment, BaseLoader + +from aixplain.utils import config + +SEGMENTOR_FUNCTIONS = [ + "split-on-linebreak", + "speaker-diarization-audio", + "voice-activity-detection", +] + +RECONSTRUCTOR_FUNCTIONS = ["text-reconstruction", "audio-reconstruction"] + +MODULE_NAME = "pipeline" +TEMPLATE = """# This is an auto generated module. PLEASE DO NOT EDIT + + +from typing import Union, Type +from aixplain.enums import DataType + +from .designer import ( + InputParam, + OutputParam, + Inputs, + Outputs, + TI, + TO, + AssetNode, + BaseReconstructor, + BaseSegmentor, +) +from .default import DefaultPipeline +from aixplain.modules import asset + +{% for spec in specs %} + +class {{ spec.class_name }}Inputs(Inputs): +{% for input in spec.inputs %} + {{ input.name }}: InputParam = None +{% endfor %} + + def __init__(self, node=None): + super().__init__(node=node) +{% for input in spec.inputs %} + self.{{ input.name }} = self.create_param(code="{{ input.name }}", data_type=DataType.{{ input.data_type | upper }}, is_required={{ input.is_required }}) +{% endfor %} + + +class {{ spec.class_name }}Outputs(Outputs): +{% for output in spec.outputs %} + {{ output.name }}: OutputParam = None +{% endfor %} +{% if spec.is_segmentor %} + audio: OutputParam = None +{% endif %} + + def __init__(self, node=None): + super().__init__(node=node) +{% for output in spec.outputs %} + self.{{ output.name }} = self.create_param(code="{{ output.name }}", data_type=DataType.{{ output.data_type | upper }}) +{% endfor %} +{% if spec.is_segmentor %} + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) +{% endif %} + + +class {{ spec.class_name }}({{spec.base_class}}[{{ spec.class_name }}Inputs, {{ spec.class_name }}Outputs]): + \"\"\" + {{ spec.description | wordwrap }} + + InputType: {{ spec.input_type }} + OutputType: {{ spec.output_type }} + \"\"\" + function: str = "{{ spec.id }}" + input_type: str = DataType.{{ spec.input_type | upper }} + output_type: str = DataType.{{ spec.output_type | upper }} + + inputs_class: Type[TI] = {{ spec.class_name }}Inputs + outputs_class: Type[TO] = {{ spec.class_name }}Outputs + +{% endfor %} + + +class Pipeline(DefaultPipeline): + +{% for spec in specs %} + def {{ spec.function_name }}(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> {{ spec.class_name }}: + \"\"\" + {{ spec.description | wordwrap }} + \"\"\" + return {{ spec.class_name }}(*args, asset_id=asset_id, pipeline=self, **kwargs) + +{% endfor %} +""" + + +def fetch_functions(): + """ + Fetch functions from the backend + """ + api_key = config.TEAM_API_KEY + aixplain_key = config.AIXPLAIN_API_KEY + backend_url = config.BACKEND_URL + + url = urljoin(backend_url, "sdk/functions") + headers = { + "Content-Type": "application/json", + } + + if aixplain_key: + headers["x-aixplain-key"] = aixplain_key + else: + headers["x-api-key"] = api_key + + r = requests.get(url, headers=headers) + try: + r.raise_for_status() + except requests.exceptions.HTTPError as e: + print("Functions could not be loaded, see error below") + raise e + + resp = r.json() + return resp["items"] + + +def populate_data_types(functions: list): + """ + Populate the data types + """ + data_types = set() + for function in functions: + for param in function["params"]: + data_types.add(param["dataType"]) + for output in function["output"]: + data_types.add(output["dataType"]) + return data_types + + +def populate_specs(functions: list): + """ + Populate the function class specs + """ + function_class_specs = [] + for function in functions: + # slugify function name by trimming some special chars and + # transforming it to snake case + function_name = ( + function["id"] + .replace("-", "_") + .replace("(", "_") + .replace(")", "_") + ) + base_class = "AssetNode" + is_segmentor = function["id"] in SEGMENTOR_FUNCTIONS + is_reconstructor = function["id"] in RECONSTRUCTOR_FUNCTIONS + if is_segmentor: + base_class = "BaseSegmentor" + elif is_reconstructor: + base_class = "BaseReconstructor" + + spec = { + "id": function["id"], + "is_segmentor": function["id"] in SEGMENTOR_FUNCTIONS, + "is_reconstructor": function["id"] in RECONSTRUCTOR_FUNCTIONS, + "function_name": function_name, + "base_class": base_class, + "class_name": "".join( + [w.title() for w in function_name.split("_")] + ), + "description": function["metaData"]["description"], + "input_type": function["metaData"]["InputType"], + "output_type": function["metaData"]["OutputType"], + "inputs": [ + { + "name": param["code"], + "data_type": param["dataType"], + "is_required": param["required"], + "is_list": param.get("multipleValues", False), + "default": param.get("defaultValues"), + "is_fixed": param.get("isFixed", False), + } + for param in function["params"] + ], + "outputs": [ + { + "name": output["code"], + "data_type": output["dataType"], + "default": output.get("defaultValue"), + } + for output in function["output"] + ], + } + + function_class_specs.append(spec) + + return function_class_specs + + +if __name__ == "__main__": + print("Fetching function specs") + + functions = fetch_functions() + data_types = populate_data_types(functions) + specs = populate_specs(functions) + + print( + f"Populating module with {len(data_types)} data types and {len(specs)} specs" + ) + env = Environment( + loader=BaseLoader(), + trim_blocks=True, + lstrip_blocks=True, + ) + template = env.from_string(TEMPLATE) + output = template.render(data_types=data_types, specs=specs) + + current_dir = pathlib.Path(__file__).parent + file_path = current_dir / f"{MODULE_NAME}.py" + + print(f"Writing module to file: {file_path}") + with open(file_path, "w") as f: + f.write(output) + + print("Module generated successfully") diff --git a/aixplain/modules/pipeline/pipeline.py b/aixplain/modules/pipeline/pipeline.py new file mode 100644 index 00000000..36bc643d --- /dev/null +++ b/aixplain/modules/pipeline/pipeline.py @@ -0,0 +1,4548 @@ +# This is an auto generated module. PLEASE DO NOT EDIT + + +from typing import Union, Type +from aixplain.enums import DataType + +from .designer import ( + InputParam, + OutputParam, + Inputs, + Outputs, + TI, + TO, + AssetNode, + BaseReconstructor, + BaseSegmentor, +) +from .default import DefaultPipeline +from aixplain.modules import asset + + +class ObjectDetectionInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + + +class ObjectDetectionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class ObjectDetection(AssetNode[ObjectDetectionInputs, ObjectDetectionOutputs]): + """ + Object Detection is a computer vision technology that identifies and locates +objects within an image, typically by drawing bounding boxes around the +detected objects and classifying them into predefined categories. + + InputType: video + OutputType: text + """ + function: str = "object-detection" + input_type: str = DataType.VIDEO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = ObjectDetectionInputs + outputs_class: Type[TO] = ObjectDetectionOutputs + + +class LanguageIdentificationInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class LanguageIdentificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class LanguageIdentification(AssetNode[LanguageIdentificationInputs, LanguageIdentificationOutputs]): + """ + Language Identification is the process of automatically determining the +language in which a given piece of text is written. + + InputType: text + OutputType: text + """ + function: str = "language-identification" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = LanguageIdentificationInputs + outputs_class: Type[TO] = LanguageIdentificationOutputs + + +class OcrInputs(Inputs): + image: InputParam = None + featuretypes: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.featuretypes = self.create_param(code="featuretypes", data_type=DataType.TEXT, is_required=True) + + +class OcrOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class Ocr(AssetNode[OcrInputs, OcrOutputs]): + """ + OCR, or Optical Character Recognition, is a technology that converts different +types of documents, such as scanned paper documents, PDFs, or images captured +by a digital camera, into editable and searchable data by recognizing and +extracting text from the images. + + InputType: image + OutputType: text + """ + function: str = "ocr" + input_type: str = DataType.IMAGE + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = OcrInputs + outputs_class: Type[TO] = OcrOutputs + + +class ScriptExecutionInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class ScriptExecutionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class ScriptExecution(AssetNode[ScriptExecutionInputs, ScriptExecutionOutputs]): + """ + Script Execution refers to the process of running a set of programmed +instructions or code within a computing environment, enabling the automated +performance of tasks, calculations, or operations as defined by the script. + + InputType: text + OutputType: text + """ + function: str = "script-execution" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = ScriptExecutionInputs + outputs_class: Type[TO] = ScriptExecutionOutputs + + +class ImageLabelDetectionInputs(Inputs): + image: InputParam = None + min_confidence: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + + +class ImageLabelDetectionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class ImageLabelDetection(AssetNode[ImageLabelDetectionInputs, ImageLabelDetectionOutputs]): + """ + Image Label Detection is a function that automatically identifies and assigns +descriptive tags or labels to objects, scenes, or elements within an image, +enabling easier categorization, search, and analysis of visual content. + + InputType: image + OutputType: label + """ + function: str = "image-label-detection" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = ImageLabelDetectionInputs + outputs_class: Type[TO] = ImageLabelDetectionOutputs + + +class ImageCaptioningInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + + +class ImageCaptioningOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class ImageCaptioning(AssetNode[ImageCaptioningInputs, ImageCaptioningOutputs]): + """ + Image Captioning is a process that involves generating a textual description of +an image, typically using machine learning models to analyze the visual content +and produce coherent and contextually relevant sentences that describe the +objects, actions, and scenes depicted in the image. + + InputType: image + OutputType: text + """ + function: str = "image-captioning" + input_type: str = DataType.IMAGE + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = ImageCaptioningInputs + outputs_class: Type[TO] = ImageCaptioningOutputs + + +class AudioLanguageIdentificationInputs(Inputs): + audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + + +class AudioLanguageIdentificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class AudioLanguageIdentification(AssetNode[AudioLanguageIdentificationInputs, AudioLanguageIdentificationOutputs]): + """ + Audio Language Identification is a process that involves analyzing an audio +recording to determine the language being spoken. + + InputType: audio + OutputType: label + """ + function: str = "audio-language-identification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AudioLanguageIdentificationInputs + outputs_class: Type[TO] = AudioLanguageIdentificationOutputs + + +class AsrAgeClassificationInputs(Inputs): + source_audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + + +class AsrAgeClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class AsrAgeClassification(AssetNode[AsrAgeClassificationInputs, AsrAgeClassificationOutputs]): + """ + The ASR Age Classification function is designed to analyze audio recordings of +speech to determine the speaker's age group by leveraging automatic speech +recognition (ASR) technology and machine learning algorithms. + + InputType: audio + OutputType: label + """ + function: str = "asr-age-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AsrAgeClassificationInputs + outputs_class: Type[TO] = AsrAgeClassificationOutputs + + +class BenchmarkScoringMtInputs(Inputs): + input: InputParam = None + text: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.input = self.create_param(code="input", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class BenchmarkScoringMtOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class BenchmarkScoringMt(AssetNode[BenchmarkScoringMtInputs, BenchmarkScoringMtOutputs]): + """ + Benchmark Scoring MT is a function designed to evaluate and score machine +translation systems by comparing their output against a set of predefined +benchmarks, thereby assessing their accuracy and performance. + + InputType: text + OutputType: label + """ + function: str = "benchmark-scoring-mt" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = BenchmarkScoringMtInputs + outputs_class: Type[TO] = BenchmarkScoringMtOutputs + + +class AsrGenderClassificationInputs(Inputs): + source_audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + + +class AsrGenderClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class AsrGenderClassification(AssetNode[AsrGenderClassificationInputs, AsrGenderClassificationOutputs]): + """ + The ASR Gender Classification function analyzes audio recordings to determine +and classify the speaker's gender based on their voice characteristics. + + InputType: audio + OutputType: label + """ + function: str = "asr-gender-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AsrGenderClassificationInputs + outputs_class: Type[TO] = AsrGenderClassificationOutputs + + +class BaseModelInputs(Inputs): + language: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class BaseModelOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class BaseModel(AssetNode[BaseModelInputs, BaseModelOutputs]): + """ + The Base-Model function serves as a foundational framework designed to provide +essential features and capabilities upon which more specialized or advanced +models can be built and customized. + + InputType: text + OutputType: text + """ + function: str = "base-model" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = BaseModelInputs + outputs_class: Type[TO] = BaseModelOutputs + + +class LanguageIdentificationAudioInputs(Inputs): + audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + + +class LanguageIdentificationAudioOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class LanguageIdentificationAudio(AssetNode[LanguageIdentificationAudioInputs, LanguageIdentificationAudioOutputs]): + """ + The Language Identification Audio function analyzes audio input to determine +and identify the language being spoken. + + InputType: audio + OutputType: label + """ + function: str = "language-identification-audio" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = LanguageIdentificationAudioInputs + outputs_class: Type[TO] = LanguageIdentificationAudioOutputs + + +class LoglikelihoodInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class LoglikelihoodOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.NUMBER) + + +class Loglikelihood(AssetNode[LoglikelihoodInputs, LoglikelihoodOutputs]): + """ + The Log Likelihood function measures the probability of observing the given +data under a specific statistical model by taking the natural logarithm of the +likelihood function, thereby transforming the product of probabilities into a +sum, which simplifies the process of optimization and parameter estimation. + + InputType: text + OutputType: number + """ + function: str = "loglikelihood" + input_type: str = DataType.TEXT + output_type: str = DataType.NUMBER + + inputs_class: Type[TI] = LoglikelihoodInputs + outputs_class: Type[TO] = LoglikelihoodOutputs + + +class VideoEmbeddingInputs(Inputs): + language: InputParam = None + video: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=False) + + +class VideoEmbeddingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.EMBEDDING) + + +class VideoEmbedding(AssetNode[VideoEmbeddingInputs, VideoEmbeddingOutputs]): + """ + Video Embedding is a process that transforms video content into a fixed- +dimensional vector representation, capturing essential features and patterns to +facilitate tasks such as retrieval, classification, and recommendation. + + InputType: video + OutputType: embedding + """ + function: str = "video-embedding" + input_type: str = DataType.VIDEO + output_type: str = DataType.EMBEDDING + + inputs_class: Type[TI] = VideoEmbeddingInputs + outputs_class: Type[TO] = VideoEmbeddingOutputs + + +class TextSegmenationInputs(Inputs): + text: InputParam = None + language: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + + +class TextSegmenationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class TextSegmenation(AssetNode[TextSegmenationInputs, TextSegmenationOutputs]): + """ + Text Segmentation is the process of dividing a continuous text into meaningful +units, such as words, sentences, or topics, to facilitate easier analysis and +understanding. + + InputType: text + OutputType: text + """ + function: str = "text-segmenation" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = TextSegmenationInputs + outputs_class: Type[TO] = TextSegmenationOutputs + + +class ImageEmbeddingInputs(Inputs): + language: InputParam = None + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class ImageEmbeddingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class ImageEmbedding(AssetNode[ImageEmbeddingInputs, ImageEmbeddingOutputs]): + """ + Image Embedding is a process that transforms an image into a fixed-dimensional +vector representation, capturing its essential features and enabling efficient +comparison, retrieval, and analysis in various machine learning and computer +vision tasks. + + InputType: image + OutputType: text + """ + function: str = "image-embedding" + input_type: str = DataType.IMAGE + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = ImageEmbeddingInputs + outputs_class: Type[TO] = ImageEmbeddingOutputs + + +class ImageManipulationInputs(Inputs): + image: InputParam = None + targetimage: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.targetimage = self.create_param(code="targetimage", data_type=DataType.IMAGE, is_required=True) + + +class ImageManipulationOutputs(Outputs): + image: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) + + +class ImageManipulation(AssetNode[ImageManipulationInputs, ImageManipulationOutputs]): + """ + Image Manipulation refers to the process of altering or enhancing digital +images using various techniques and tools to achieve desired visual effects, +correct imperfections, or transform the image's appearance. + + InputType: image + OutputType: image + """ + function: str = "image-manipulation" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE + + inputs_class: Type[TI] = ImageManipulationInputs + outputs_class: Type[TO] = ImageManipulationOutputs + + +class ImageToVideoGenerationInputs(Inputs): + language: InputParam = None + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class ImageToVideoGenerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) + + +class ImageToVideoGeneration(AssetNode[ImageToVideoGenerationInputs, ImageToVideoGenerationOutputs]): + """ + The Image To Video Generation function transforms a series of static images +into a cohesive, dynamic video sequence, often incorporating transitions, +effects, and synchronization with audio to create a visually engaging +narrative. + + InputType: image + OutputType: video + """ + function: str = "image-to-video-generation" + input_type: str = DataType.IMAGE + output_type: str = DataType.VIDEO + + inputs_class: Type[TI] = ImageToVideoGenerationInputs + outputs_class: Type[TO] = ImageToVideoGenerationOutputs + + +class AudioForcedAlignmentInputs(Inputs): + audio: InputParam = None + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class AudioForcedAlignmentOutputs(Outputs): + text: OutputParam = None + audio: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + + +class AudioForcedAlignment(AssetNode[AudioForcedAlignmentInputs, AudioForcedAlignmentOutputs]): + """ + Audio Forced Alignment is a process that synchronizes a given audio recording +with its corresponding transcript by precisely aligning each spoken word or +phoneme to its exact timing within the audio. + + InputType: audio + OutputType: audio + """ + function: str = "audio-forced-alignment" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = AudioForcedAlignmentInputs + outputs_class: Type[TO] = AudioForcedAlignmentOutputs + + +class BenchmarkScoringAsrInputs(Inputs): + input: InputParam = None + text: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.input = self.create_param(code="input", data_type=DataType.AUDIO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class BenchmarkScoringAsrOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class BenchmarkScoringAsr(AssetNode[BenchmarkScoringAsrInputs, BenchmarkScoringAsrOutputs]): + """ + Benchmark Scoring ASR is a function that evaluates and compares the performance +of automatic speech recognition systems by analyzing their accuracy, speed, and +other relevant metrics against a standardized set of benchmarks. + + InputType: audio + OutputType: label + """ + function: str = "benchmark-scoring-asr" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = BenchmarkScoringAsrInputs + outputs_class: Type[TO] = BenchmarkScoringAsrOutputs + + +class VisualQuestionAnsweringInputs(Inputs): + text: InputParam = None + language: InputParam = None + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class VisualQuestionAnsweringOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class VisualQuestionAnswering(AssetNode[VisualQuestionAnsweringInputs, VisualQuestionAnsweringOutputs]): + """ + Visual Question Answering (VQA) is a task in artificial intelligence that +involves analyzing an image and providing accurate, contextually relevant +answers to questions posed about the visual content of that image. + + InputType: image + OutputType: video + """ + function: str = "visual-question-answering" + input_type: str = DataType.IMAGE + output_type: str = DataType.VIDEO + + inputs_class: Type[TI] = VisualQuestionAnsweringInputs + outputs_class: Type[TO] = VisualQuestionAnsweringOutputs + + +class DocumentImageParsingInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class DocumentImageParsingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class DocumentImageParsing(AssetNode[DocumentImageParsingInputs, DocumentImageParsingOutputs]): + """ + Document Image Parsing is the process of analyzing and converting scanned or +photographed images of documents into structured, machine-readable formats by +identifying and extracting text, layout, and other relevant information. + + InputType: image + OutputType: text + """ + function: str = "document-image-parsing" + input_type: str = DataType.IMAGE + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = DocumentImageParsingInputs + outputs_class: Type[TO] = DocumentImageParsingOutputs + + +class DocumentInformationExtractionInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class DocumentInformationExtractionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class DocumentInformationExtraction(AssetNode[DocumentInformationExtractionInputs, DocumentInformationExtractionOutputs]): + """ + Document Information Extraction is the process of automatically identifying, +extracting, and structuring relevant data from unstructured or semi-structured +documents, such as invoices, receipts, contracts, and forms, to facilitate +easier data management and analysis. + + InputType: image + OutputType: text + """ + function: str = "document-information-extraction" + input_type: str = DataType.IMAGE + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = DocumentInformationExtractionInputs + outputs_class: Type[TO] = DocumentInformationExtractionOutputs + + +class DepthEstimationInputs(Inputs): + language: InputParam = None + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class DepthEstimationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class DepthEstimation(AssetNode[DepthEstimationInputs, DepthEstimationOutputs]): + """ + Depth estimation is a computational process that determines the distance of +objects from a viewpoint, typically using visual data from cameras or sensors +to create a three-dimensional understanding of a scene. + + InputType: image + OutputType: text + """ + function: str = "depth-estimation" + input_type: str = DataType.IMAGE + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = DepthEstimationInputs + outputs_class: Type[TO] = DepthEstimationOutputs + + +class VideoGenerationInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class VideoGenerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) + + +class VideoGeneration(AssetNode[VideoGenerationInputs, VideoGenerationOutputs]): + """ + Video Generation is the process of creating video content through automated or +semi-automated means, often utilizing algorithms, artificial intelligence, or +software tools to produce visual and audio elements that can range from simple +animations to complex, realistic scenes. + + InputType: text + OutputType: video + """ + function: str = "video-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.VIDEO + + inputs_class: Type[TI] = VideoGenerationInputs + outputs_class: Type[TO] = VideoGenerationOutputs + + +class ReferencelessAudioGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) + self.sources = self.create_param(code="sources", data_type=DataType.AUDIO, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + + +class ReferencelessAudioGenerationMetricOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class ReferencelessAudioGenerationMetric(AssetNode[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs]): + """ + The Referenceless Audio Generation Metric is a tool designed to evaluate the +quality of generated audio content without the need for a reference or original +audio sample for comparison. + + InputType: text + OutputType: text + """ + function: str = "referenceless-audio-generation-metric" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = ReferencelessAudioGenerationMetricInputs + outputs_class: Type[TO] = ReferencelessAudioGenerationMetricOutputs + + +class MultiClassImageClassificationInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class MultiClassImageClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class MultiClassImageClassification(AssetNode[MultiClassImageClassificationInputs, MultiClassImageClassificationOutputs]): + """ + Multi Class Image Classification is a machine learning task where an algorithm +is trained to categorize images into one of several predefined classes or +categories based on their visual content. + + InputType: image + OutputType: label + """ + function: str = "multi-class-image-classification" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = MultiClassImageClassificationInputs + outputs_class: Type[TO] = MultiClassImageClassificationOutputs + + +class SemanticSegmentationInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class SemanticSegmentationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class SemanticSegmentation(AssetNode[SemanticSegmentationInputs, SemanticSegmentationOutputs]): + """ + Semantic segmentation is a computer vision process that involves classifying +each pixel in an image into a predefined category, effectively partitioning the +image into meaningful segments based on the objects or regions they represent. + + InputType: image + OutputType: label + """ + function: str = "semantic-segmentation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = SemanticSegmentationInputs + outputs_class: Type[TO] = SemanticSegmentationOutputs + + +class InstanceSegmentationInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class InstanceSegmentationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class InstanceSegmentation(AssetNode[InstanceSegmentationInputs, InstanceSegmentationOutputs]): + """ + Instance segmentation is a computer vision task that involves detecting and +delineating each distinct object within an image, assigning a unique label and +precise boundary to every individual instance of objects, even if they belong +to the same category. + + InputType: image + OutputType: label + """ + function: str = "instance-segmentation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = InstanceSegmentationInputs + outputs_class: Type[TO] = InstanceSegmentationOutputs + + +class ImageColorizationInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class ImageColorizationOutputs(Outputs): + image: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) + + +class ImageColorization(AssetNode[ImageColorizationInputs, ImageColorizationOutputs]): + """ + Image colorization is a process that involves adding color to grayscale images, +transforming them from black-and-white to full-color representations, often +using advanced algorithms and machine learning techniques to predict and apply +the appropriate hues and shades. + + InputType: image + OutputType: image + """ + function: str = "image-colorization" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE + + inputs_class: Type[TI] = ImageColorizationInputs + outputs_class: Type[TO] = ImageColorizationOutputs + + +class AudioGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.AUDIO, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + + +class AudioGenerationMetricOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class AudioGenerationMetric(AssetNode[AudioGenerationMetricInputs, AudioGenerationMetricOutputs]): + """ + The Audio Generation Metric is a quantitative measure used to evaluate the +quality, accuracy, and overall performance of audio generated by artificial +intelligence systems, often considering factors such as fidelity, +intelligibility, and similarity to human-produced audio. + + InputType: text + OutputType: text + """ + function: str = "audio-generation-metric" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = AudioGenerationMetricInputs + outputs_class: Type[TO] = AudioGenerationMetricOutputs + + +class ImageImpaintingInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class ImageImpaintingOutputs(Outputs): + image: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) + + +class ImageImpainting(AssetNode[ImageImpaintingInputs, ImageImpaintingOutputs]): + """ + Image inpainting is a process that involves filling in missing or damaged parts +of an image in a way that is visually coherent and seamlessly blends with the +surrounding areas, often using advanced algorithms and techniques to restore +the image to its original or intended appearance. + + InputType: image + OutputType: image + """ + function: str = "image-impainting" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE + + inputs_class: Type[TI] = ImageImpaintingInputs + outputs_class: Type[TO] = ImageImpaintingOutputs + + +class StyleTransferInputs(Inputs): + image: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + + +class StyleTransferOutputs(Outputs): + image: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) + + +class StyleTransfer(AssetNode[StyleTransferInputs, StyleTransferOutputs]): + """ + Style Transfer is a technique in artificial intelligence that applies the +visual style of one image (such as the brushstrokes of a famous painting) to +the content of another image, effectively blending the artistic elements of the +first image with the subject matter of the second. + + InputType: image + OutputType: image + """ + function: str = "style-transfer" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE + + inputs_class: Type[TI] = StyleTransferInputs + outputs_class: Type[TO] = StyleTransferOutputs + + +class MultiClassTextClassificationInputs(Inputs): + language: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + + +class MultiClassTextClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class MultiClassTextClassification(AssetNode[MultiClassTextClassificationInputs, MultiClassTextClassificationOutputs]): + """ + Multi Class Text Classification is a natural language processing task that +involves categorizing a given text into one of several predefined classes or +categories based on its content. + + InputType: text + OutputType: label + """ + function: str = "multi-class-text-classification" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = MultiClassTextClassificationInputs + outputs_class: Type[TO] = MultiClassTextClassificationOutputs + + +class TextEmbeddingInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class TextEmbeddingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class TextEmbedding(AssetNode[TextEmbeddingInputs, TextEmbeddingOutputs]): + """ + Text embedding is a process that converts text into numerical vectors, +capturing the semantic meaning and contextual relationships of words or +phrases, enabling machines to understand and analyze natural language more +effectively. + + InputType: text + OutputType: text + """ + function: str = "text-embedding" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = TextEmbeddingInputs + outputs_class: Type[TO] = TextEmbeddingOutputs + + +class MultiLabelTextClassificationInputs(Inputs): + language: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + + +class MultiLabelTextClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class MultiLabelTextClassification(AssetNode[MultiLabelTextClassificationInputs, MultiLabelTextClassificationOutputs]): + """ + Multi Label Text Classification is a natural language processing task where a +given text is analyzed and assigned multiple relevant labels or categories from +a predefined set, allowing for the text to belong to more than one category +simultaneously. + + InputType: text + OutputType: label + """ + function: str = "multi-label-text-classification" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = MultiLabelTextClassificationInputs + outputs_class: Type[TO] = MultiLabelTextClassificationOutputs + + +class TextReconstructionInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class TextReconstructionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class TextReconstruction(BaseReconstructor[TextReconstructionInputs, TextReconstructionOutputs]): + """ + Text Reconstruction is a process that involves piecing together fragmented or +incomplete text data to restore it to its original, coherent form. + + InputType: text + OutputType: text + """ + function: str = "text-reconstruction" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = TextReconstructionInputs + outputs_class: Type[TO] = TextReconstructionOutputs + + +class FactCheckingInputs(Inputs): + language: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + + +class FactCheckingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class FactChecking(AssetNode[FactCheckingInputs, FactCheckingOutputs]): + """ + Fact Checking is the process of verifying the accuracy and truthfulness of +information, statements, or claims by cross-referencing with reliable sources +and evidence. + + InputType: text + OutputType: label + """ + function: str = "fact-checking" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = FactCheckingInputs + outputs_class: Type[TO] = FactCheckingOutputs + + +class SpeechClassificationInputs(Inputs): + audio: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + + +class SpeechClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class SpeechClassification(AssetNode[SpeechClassificationInputs, SpeechClassificationOutputs]): + """ + Speech Classification is a process that involves analyzing and categorizing +spoken language into predefined categories or classes based on various features +such as tone, pitch, and linguistic content. + + InputType: audio + OutputType: label + """ + function: str = "speech-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = SpeechClassificationInputs + outputs_class: Type[TO] = SpeechClassificationOutputs + + +class IntentClassificationInputs(Inputs): + language: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + + +class IntentClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class IntentClassification(AssetNode[IntentClassificationInputs, IntentClassificationOutputs]): + """ + Intent Classification is a natural language processing task that involves +analyzing and categorizing user text input to determine the underlying purpose +or goal behind the communication, such as booking a flight, asking for weather +information, or setting a reminder. + + InputType: text + OutputType: label + """ + function: str = "intent-classification" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = IntentClassificationInputs + outputs_class: Type[TO] = IntentClassificationOutputs + + +class PartOfSpeechTaggingInputs(Inputs): + language: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + + +class PartOfSpeechTaggingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class PartOfSpeechTagging(AssetNode[PartOfSpeechTaggingInputs, PartOfSpeechTaggingOutputs]): + """ + Part of Speech Tagging is a natural language processing task that involves +assigning each word in a sentence its corresponding part of speech, such as +noun, verb, adjective, or adverb, based on its role and context within the +sentence. + + InputType: text + OutputType: label + """ + function: str = "part-of-speech-tagging" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = PartOfSpeechTaggingInputs + outputs_class: Type[TO] = PartOfSpeechTaggingOutputs + + +class MetricAggregationInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class MetricAggregationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class MetricAggregation(AssetNode[MetricAggregationInputs, MetricAggregationOutputs]): + """ + Metric Aggregation is a function that computes and summarizes numerical data by +applying statistical operations, such as averaging, summing, or finding the +minimum and maximum values, to provide insights and facilitate analysis of +large datasets. + + InputType: text + OutputType: text + """ + function: str = "metric-aggregation" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = MetricAggregationInputs + outputs_class: Type[TO] = MetricAggregationOutputs + + +class DialectDetectionInputs(Inputs): + audio: InputParam = None + language: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + + +class DialectDetectionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class DialectDetection(AssetNode[DialectDetectionInputs, DialectDetectionOutputs]): + """ + Dialect Detection is a function that identifies and classifies the specific +regional or social variations of a language spoken or written by an individual, +enabling the recognition of distinct linguistic patterns and nuances associated +with different dialects. + + InputType: audio + OutputType: text + """ + function: str = "dialect-detection" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = DialectDetectionInputs + outputs_class: Type[TO] = DialectDetectionOutputs + + +class InverseTextNormalizationInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + + +class InverseTextNormalizationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class InverseTextNormalization(AssetNode[InverseTextNormalizationInputs, InverseTextNormalizationOutputs]): + """ + Inverse Text Normalization is the process of converting spoken or written +language in its normalized form, such as numbers, dates, and abbreviations, +back into their original, more complex or detailed textual representations. + + InputType: text + OutputType: label + """ + function: str = "inverse-text-normalization" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = InverseTextNormalizationInputs + outputs_class: Type[TO] = InverseTextNormalizationOutputs + + +class TextToAudioInputs(Inputs): + text: InputParam = None + language: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + + +class TextToAudioOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + + +class TextToAudio(AssetNode[TextToAudioInputs, TextToAudioOutputs]): + """ + The Text to Audio function converts written text into spoken words, allowing +users to listen to the content instead of reading it. + + InputType: text + OutputType: audio + """ + function: str = "text-to-audio" + input_type: str = DataType.TEXT + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = TextToAudioInputs + outputs_class: Type[TO] = TextToAudioOutputs + + +class FillTextMaskInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class FillTextMaskOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class FillTextMask(AssetNode[FillTextMaskInputs, FillTextMaskOutputs]): + """ + The "Fill Text Mask" function takes a text input with masked or placeholder +characters and replaces those placeholders with specified or contextually +appropriate characters to generate a complete and coherent text output. + + InputType: text + OutputType: text + """ + function: str = "fill-text-mask" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = FillTextMaskInputs + outputs_class: Type[TO] = FillTextMaskOutputs + + +class VideoContentModerationInputs(Inputs): + video: InputParam = None + min_confidence: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + + +class VideoContentModerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class VideoContentModeration(AssetNode[VideoContentModerationInputs, VideoContentModerationOutputs]): + """ + Video Content Moderation is the process of reviewing, analyzing, and filtering +video content to ensure it adheres to community guidelines, legal standards, +and platform policies, thereby preventing the dissemination of inappropriate, +harmful, or illegal material. + + InputType: video + OutputType: label + """ + function: str = "video-content-moderation" + input_type: str = DataType.VIDEO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = VideoContentModerationInputs + outputs_class: Type[TO] = VideoContentModerationOutputs + + +class ExtractAudioFromVideoInputs(Inputs): + video: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + + +class ExtractAudioFromVideoOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + + +class ExtractAudioFromVideo(AssetNode[ExtractAudioFromVideoInputs, ExtractAudioFromVideoOutputs]): + """ + The "Extract Audio From Video" function allows users to separate and save the +audio track from a video file, enabling them to obtain just the sound without +the accompanying visual content. + + InputType: video + OutputType: audio + """ + function: str = "extract-audio-from-video" + input_type: str = DataType.VIDEO + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = ExtractAudioFromVideoInputs + outputs_class: Type[TO] = ExtractAudioFromVideoOutputs + + +class ImageCompressionInputs(Inputs): + image: InputParam = None + apl_qfactor: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.apl_qfactor = self.create_param(code="apl_qfactor", data_type=DataType.TEXT, is_required=False) + + +class ImageCompressionOutputs(Outputs): + image: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) + + +class ImageCompression(AssetNode[ImageCompressionInputs, ImageCompressionOutputs]): + """ + Image compression is a process that reduces the file size of an image by +removing redundant or non-essential data, while maintaining an acceptable level +of visual quality. + + InputType: image + OutputType: image + """ + function: str = "image-compression" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE + + inputs_class: Type[TI] = ImageCompressionInputs + outputs_class: Type[TO] = ImageCompressionOutputs + + +class MultilingualSpeechRecognitionInputs(Inputs): + source_audio: InputParam = None + language: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + + +class MultilingualSpeechRecognitionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class MultilingualSpeechRecognition(AssetNode[MultilingualSpeechRecognitionInputs, MultilingualSpeechRecognitionOutputs]): + """ + Multilingual Speech Recognition is a technology that enables the automatic +transcription of spoken language into text across multiple languages, allowing +for seamless communication and understanding in diverse linguistic contexts. + + InputType: audio + OutputType: text + """ + function: str = "multilingual-speech-recognition" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = MultilingualSpeechRecognitionInputs + outputs_class: Type[TO] = MultilingualSpeechRecognitionOutputs + + +class ReferencelessTextGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + + +class ReferencelessTextGenerationMetricOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class ReferencelessTextGenerationMetric(AssetNode[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs]): + """ + The Referenceless Text Generation Metric is a method for evaluating the quality +of generated text without requiring a reference text for comparison, often +leveraging models or algorithms to assess coherence, relevance, and fluency +based on intrinsic properties of the text itself. + + InputType: text + OutputType: text + """ + function: str = "referenceless-text-generation-metric" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = ReferencelessTextGenerationMetricInputs + outputs_class: Type[TO] = ReferencelessTextGenerationMetricOutputs + + +class TextGenerationMetricDefaultInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.TEXT, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + + +class TextGenerationMetricDefaultOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class TextGenerationMetricDefault(AssetNode[TextGenerationMetricDefaultInputs, TextGenerationMetricDefaultOutputs]): + """ + The "Text Generation Metric Default" function provides a standard set of +evaluation metrics for assessing the quality and performance of text generation +models. + + InputType: text + OutputType: text + """ + function: str = "text-generation-metric-default" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = TextGenerationMetricDefaultInputs + outputs_class: Type[TO] = TextGenerationMetricDefaultOutputs + + +class NoiseRemovalInputs(Inputs): + audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + + +class NoiseRemovalOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + + +class NoiseRemoval(AssetNode[NoiseRemovalInputs, NoiseRemovalOutputs]): + """ + Noise Removal is a process that involves identifying and eliminating unwanted +random variations or disturbances from an audio signal to enhance the clarity +and quality of the underlying information. + + InputType: audio + OutputType: audio + """ + function: str = "noise-removal" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = NoiseRemovalInputs + outputs_class: Type[TO] = NoiseRemovalOutputs + + +class AudioReconstructionInputs(Inputs): + audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + + +class AudioReconstructionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + + +class AudioReconstruction(BaseReconstructor[AudioReconstructionInputs, AudioReconstructionOutputs]): + """ + Audio Reconstruction is the process of restoring or recreating audio signals +from incomplete, damaged, or degraded recordings to achieve a high-quality, +accurate representation of the original sound. + + InputType: audio + OutputType: audio + """ + function: str = "audio-reconstruction" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = AudioReconstructionInputs + outputs_class: Type[TO] = AudioReconstructionOutputs + + +class VoiceCloningInputs(Inputs): + text: InputParam = None + audio: InputParam = None + language: InputParam = None + dialect: InputParam = None + voice: InputParam = None + script: InputParam = None + type: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) + + +class VoiceCloningOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + + +class VoiceCloning(AssetNode[VoiceCloningInputs, VoiceCloningOutputs]): + """ + Voice cloning is a technology that uses artificial intelligence to create a +digital replica of a person's voice, allowing for the generation of speech that +mimics the tone, pitch, and speaking style of the original speaker. + + InputType: text + OutputType: audio + """ + function: str = "voice-cloning" + input_type: str = DataType.TEXT + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = VoiceCloningInputs + outputs_class: Type[TO] = VoiceCloningOutputs + + +class DiacritizationInputs(Inputs): + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class DiacritizationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class Diacritization(AssetNode[DiacritizationInputs, DiacritizationOutputs]): + """ + Diacritization is the process of adding diacritical marks to letters in a text +to indicate pronunciation, stress, tone, or meaning, often used in languages +such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in +written communication. + + InputType: text + OutputType: text + """ + function: str = "diacritization" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = DiacritizationInputs + outputs_class: Type[TO] = DiacritizationOutputs + + +class AudioEmotionDetectionInputs(Inputs): + audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + + +class AudioEmotionDetectionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class AudioEmotionDetection(AssetNode[AudioEmotionDetectionInputs, AudioEmotionDetectionOutputs]): + """ + Audio Emotion Detection is a technology that analyzes vocal characteristics and +patterns in audio recordings to identify and classify the emotional state of +the speaker. + + InputType: audio + OutputType: label + """ + function: str = "audio-emotion-detection" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AudioEmotionDetectionInputs + outputs_class: Type[TO] = AudioEmotionDetectionOutputs + + +class TextSummarizationInputs(Inputs): + text: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + + +class TextSummarizationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class TextSummarization(AssetNode[TextSummarizationInputs, TextSummarizationOutputs]): + """ + Text summarization is the process of condensing a large body of text into a +shorter version, capturing the main points and essential information while +maintaining coherence and meaning. + + InputType: text + OutputType: text + """ + function: str = "text-summarization" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = TextSummarizationInputs + outputs_class: Type[TO] = TextSummarizationOutputs + + +class EntityLinkingInputs(Inputs): + text: InputParam = None + language: InputParam = None + domain: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) + + +class EntityLinkingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class EntityLinking(AssetNode[EntityLinkingInputs, EntityLinkingOutputs]): + """ + Entity Linking is the process of identifying and connecting mentions of +entities within a text to their corresponding entries in a structured knowledge +base, thereby enabling the disambiguation of terms and enhancing the +understanding of the text's context. + + InputType: text + OutputType: label + """ + function: str = "entity-linking" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = EntityLinkingInputs + outputs_class: Type[TO] = EntityLinkingOutputs + + +class TextGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.TEXT, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + + +class TextGenerationMetricOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class TextGenerationMetric(AssetNode[TextGenerationMetricInputs, TextGenerationMetricOutputs]): + """ + A Text Generation Metric is a quantitative measure used to evaluate the quality +and effectiveness of text produced by natural language processing models, often +assessing aspects such as coherence, relevance, fluency, and adherence to given +prompts or instructions. + + InputType: text + OutputType: text + """ + function: str = "text-generation-metric" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = TextGenerationMetricInputs + outputs_class: Type[TO] = TextGenerationMetricOutputs + + +class SplitOnLinebreakInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class SplitOnLinebreakOutputs(Outputs): + data: OutputParam = None + audio: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + + +class SplitOnLinebreak(BaseSegmentor[SplitOnLinebreakInputs, SplitOnLinebreakOutputs]): + """ + The "Split On Linebreak" function divides a given string into a list of +substrings, using linebreaks (newline characters) as the points of separation. + + InputType: text + OutputType: text + """ + function: str = "split-on-linebreak" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = SplitOnLinebreakInputs + outputs_class: Type[TO] = SplitOnLinebreakOutputs + + +class SentimentAnalysisInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class SentimentAnalysisOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class SentimentAnalysis(AssetNode[SentimentAnalysisInputs, SentimentAnalysisOutputs]): + """ + Sentiment Analysis is a natural language processing technique used to determine +and classify the emotional tone or subjective information expressed in a piece +of text, such as identifying whether the sentiment is positive, negative, or +neutral. + + InputType: text + OutputType: label + """ + function: str = "sentiment-analysis" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = SentimentAnalysisInputs + outputs_class: Type[TO] = SentimentAnalysisOutputs + + +class KeywordSpottingInputs(Inputs): + audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + + +class KeywordSpottingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class KeywordSpotting(AssetNode[KeywordSpottingInputs, KeywordSpottingOutputs]): + """ + Keyword Spotting is a function that enables the detection and identification of +specific words or phrases within a stream of audio, often used in voice- +activated systems to trigger actions or commands based on recognized keywords. + + InputType: audio + OutputType: label + """ + function: str = "keyword-spotting" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = KeywordSpottingInputs + outputs_class: Type[TO] = KeywordSpottingOutputs + + +class TextClassificationInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class TextClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class TextClassification(AssetNode[TextClassificationInputs, TextClassificationOutputs]): + """ + Text Classification is a natural language processing task that involves +categorizing text into predefined labels or classes based on its content, +enabling automated organization, filtering, and analysis of large volumes of +textual data. + + InputType: text + OutputType: label + """ + function: str = "text-classification" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = TextClassificationInputs + outputs_class: Type[TO] = TextClassificationOutputs + + +class OtherMultipurposeInputs(Inputs): + text: InputParam = None + language: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + + +class OtherMultipurposeOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class OtherMultipurpose(AssetNode[OtherMultipurposeInputs, OtherMultipurposeOutputs]): + """ + The "Other (Multipurpose)" function serves as a versatile category designed to +accommodate a wide range of tasks and activities that do not fit neatly into +predefined classifications, offering flexibility and adaptability for various +needs. + + InputType: text + OutputType: text + """ + function: str = "other-(multipurpose)" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = OtherMultipurposeInputs + outputs_class: Type[TO] = OtherMultipurposeOutputs + + +class SpeechSynthesisInputs(Inputs): + audio: InputParam = None + language: InputParam = None + dialect: InputParam = None + voice: InputParam = None + script: InputParam = None + text: InputParam = None + type: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) + + +class SpeechSynthesisOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + + +class SpeechSynthesis(AssetNode[SpeechSynthesisInputs, SpeechSynthesisOutputs]): + """ + Speech synthesis is the artificial production of human speech, typically +achieved through software or hardware systems that convert text into spoken +words, enabling machines to communicate verbally with users. + + InputType: text + OutputType: audio + """ + function: str = "speech-synthesis" + input_type: str = DataType.TEXT + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = SpeechSynthesisInputs + outputs_class: Type[TO] = SpeechSynthesisOutputs + + +class AudioIntentDetectionInputs(Inputs): + audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + + +class AudioIntentDetectionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class AudioIntentDetection(AssetNode[AudioIntentDetectionInputs, AudioIntentDetectionOutputs]): + """ + Audio Intent Detection is a process that involves analyzing audio signals to +identify and interpret the underlying intentions or purposes behind spoken +words, enabling systems to understand and respond appropriately to human +speech. + + InputType: audio + OutputType: label + """ + function: str = "audio-intent-detection" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AudioIntentDetectionInputs + outputs_class: Type[TO] = AudioIntentDetectionOutputs + + +class VideoLabelDetectionInputs(Inputs): + video: InputParam = None + min_confidence: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + + +class VideoLabelDetectionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class VideoLabelDetection(AssetNode[VideoLabelDetectionInputs, VideoLabelDetectionOutputs]): + """ + Video Label Detection is a function that automatically identifies and tags +various objects, scenes, activities, and other relevant elements within a +video, providing descriptive labels that enhance searchability and content +organization. + + InputType: video + OutputType: label + """ + function: str = "video-label-detection" + input_type: str = DataType.VIDEO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = VideoLabelDetectionInputs + outputs_class: Type[TO] = VideoLabelDetectionOutputs + + +class AsrQualityEstimationInputs(Inputs): + text: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class AsrQualityEstimationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class AsrQualityEstimation(AssetNode[AsrQualityEstimationInputs, AsrQualityEstimationOutputs]): + """ + ASR Quality Estimation is a process that evaluates the accuracy and reliability +of automatic speech recognition systems by analyzing their performance in +transcribing spoken language into text. + + InputType: text + OutputType: label + """ + function: str = "asr-quality-estimation" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AsrQualityEstimationInputs + outputs_class: Type[TO] = AsrQualityEstimationOutputs + + +class AudioTranscriptAnalysisInputs(Inputs): + language: InputParam = None + dialect: InputParam = None + source_supplier: InputParam = None + source_audio: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class AudioTranscriptAnalysisOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class AudioTranscriptAnalysis(AssetNode[AudioTranscriptAnalysisInputs, AudioTranscriptAnalysisOutputs]): + """ + Audio Transcript Analysis is a process that involves converting spoken language +from audio recordings into written text, followed by examining and interpreting +the transcribed content to extract meaningful insights, identify patterns, and +derive actionable information. + + InputType: audio + OutputType: text + """ + function: str = "audio-transcript-analysis" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = AudioTranscriptAnalysisInputs + outputs_class: Type[TO] = AudioTranscriptAnalysisOutputs + + +class SearchInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class SearchOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class Search(AssetNode[SearchInputs, SearchOutputs]): + """ + The "Search" function allows users to input keywords or phrases to quickly +locate specific information, files, or content within a database, website, or +application. + + InputType: text + OutputType: text + """ + function: str = "search" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = SearchInputs + outputs_class: Type[TO] = SearchOutputs + + +class VideoForcedAlignmentInputs(Inputs): + video: InputParam = None + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class VideoForcedAlignmentOutputs(Outputs): + text: OutputParam = None + video: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT) + self.video = self.create_param(code="video", data_type=DataType.VIDEO) + + +class VideoForcedAlignment(AssetNode[VideoForcedAlignmentInputs, VideoForcedAlignmentOutputs]): + """ + Video Forced Alignment is a process that synchronizes video footage with +corresponding audio tracks by precisely aligning the visual and auditory +elements, ensuring that the movements of speakers' lips match the spoken words. + + InputType: video + OutputType: video + """ + function: str = "video-forced-alignment" + input_type: str = DataType.VIDEO + output_type: str = DataType.VIDEO + + inputs_class: Type[TI] = VideoForcedAlignmentInputs + outputs_class: Type[TO] = VideoForcedAlignmentOutputs + + +class VisemeGenerationInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class VisemeGenerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class VisemeGeneration(AssetNode[VisemeGenerationInputs, VisemeGenerationOutputs]): + """ + Viseme Generation is the process of creating visual representations of +phonemes, which are the distinct units of sound in speech, to synchronize lip +movements with spoken words in animations or virtual avatars. + + InputType: text + OutputType: label + """ + function: str = "viseme-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = VisemeGenerationInputs + outputs_class: Type[TO] = VisemeGenerationOutputs + + +class TopicClassificationInputs(Inputs): + text: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + + +class TopicClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class TopicClassification(AssetNode[TopicClassificationInputs, TopicClassificationOutputs]): + """ + Topic Classification is a natural language processing function that categorizes +text into predefined topics or subjects based on its content, enabling +efficient organization and retrieval of information. + + InputType: text + OutputType: label + """ + function: str = "topic-classification" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = TopicClassificationInputs + outputs_class: Type[TO] = TopicClassificationOutputs + + +class OffensiveLanguageIdentificationInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class OffensiveLanguageIdentificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class OffensiveLanguageIdentification(AssetNode[OffensiveLanguageIdentificationInputs, OffensiveLanguageIdentificationOutputs]): + """ + Offensive Language Identification is a function that analyzes text to detect +and flag language that is abusive, harmful, or inappropriate, helping to +maintain a respectful and safe communication environment. + + InputType: text + OutputType: label + """ + function: str = "offensive-language-identification" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = OffensiveLanguageIdentificationInputs + outputs_class: Type[TO] = OffensiveLanguageIdentificationOutputs + + +class SpeechTranslationInputs(Inputs): + source_audio: InputParam = None + sourcelanguage: InputParam = None + targetlanguage: InputParam = None + dialect: InputParam = None + voice: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) + self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class SpeechTranslationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class SpeechTranslation(AssetNode[SpeechTranslationInputs, SpeechTranslationOutputs]): + """ + Speech Translation is a technology that converts spoken language in real-time +from one language to another, enabling seamless communication between speakers +of different languages. + + InputType: audio + OutputType: text + """ + function: str = "speech-translation" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = SpeechTranslationInputs + outputs_class: Type[TO] = SpeechTranslationOutputs + + +class SpeakerDiarizationAudioInputs(Inputs): + audio: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + + +class SpeakerDiarizationAudioOutputs(Outputs): + data: OutputParam = None + audio: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + + +class SpeakerDiarizationAudio(BaseSegmentor[SpeakerDiarizationAudioInputs, SpeakerDiarizationAudioOutputs]): + """ + Speaker Diarization Audio is a process that involves segmenting an audio +recording into distinct sections, each corresponding to a different speaker, in +order to identify and differentiate between multiple speakers within the same +audio stream. + + InputType: audio + OutputType: label + """ + function: str = "speaker-diarization-audio" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = SpeakerDiarizationAudioInputs + outputs_class: Type[TO] = SpeakerDiarizationAudioOutputs + + +class AudioTranscriptImprovementInputs(Inputs): + language: InputParam = None + dialect: InputParam = None + source_supplier: InputParam = None + is_medical: InputParam = None + source_audio: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) + self.is_medical = self.create_param(code="is_medical", data_type=DataType.TEXT, is_required=True) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class AudioTranscriptImprovementOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class AudioTranscriptImprovement(AssetNode[AudioTranscriptImprovementInputs, AudioTranscriptImprovementOutputs]): + """ + Audio Transcript Improvement is a function that enhances the accuracy and +clarity of transcribed audio recordings by correcting errors, refining +language, and ensuring the text faithfully represents the original spoken +content. + + InputType: audio + OutputType: text + """ + function: str = "audio-transcript-improvement" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = AudioTranscriptImprovementInputs + outputs_class: Type[TO] = AudioTranscriptImprovementOutputs + + +class SpeechNonSpeechClassificationInputs(Inputs): + audio: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + + +class SpeechNonSpeechClassificationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class SpeechNonSpeechClassification(AssetNode[SpeechNonSpeechClassificationInputs, SpeechNonSpeechClassificationOutputs]): + """ + The function "Speech or Non-Speech Classification" is designed to analyze audio +input and determine whether the sound is human speech or non-speech noise, +enabling applications such as voice recognition systems to filter out +irrelevant background sounds. + + InputType: audio + OutputType: label + """ + function: str = "speech-non-speech-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = SpeechNonSpeechClassificationInputs + outputs_class: Type[TO] = SpeechNonSpeechClassificationOutputs + + +class TextDenormalizationInputs(Inputs): + text: InputParam = None + language: InputParam = None + lowercase_latin: InputParam = None + remove_accents: InputParam = None + remove_punctuation: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.lowercase_latin = self.create_param(code="lowercase_latin", data_type=DataType.TEXT, is_required=False) + self.remove_accents = self.create_param(code="remove_accents", data_type=DataType.TEXT, is_required=False) + self.remove_punctuation = self.create_param(code="remove_punctuation", data_type=DataType.TEXT, is_required=False) + + +class TextDenormalizationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class TextDenormalization(AssetNode[TextDenormalizationInputs, TextDenormalizationOutputs]): + """ + Text Denormalization is the process of converting abbreviated, contracted, or +otherwise simplified text into its full, standard form, often to improve +readability and ensure consistency in natural language processing tasks. + + InputType: text + OutputType: label + """ + function: str = "text-denormalization" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = TextDenormalizationInputs + outputs_class: Type[TO] = TextDenormalizationOutputs + + +class ImageContentModerationInputs(Inputs): + image: InputParam = None + min_confidence: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + + +class ImageContentModerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class ImageContentModeration(AssetNode[ImageContentModerationInputs, ImageContentModerationOutputs]): + """ + Image Content Moderation is a process that involves analyzing and filtering +images to detect and manage inappropriate, harmful, or sensitive content, +ensuring compliance with community guidelines and legal standards. + + InputType: image + OutputType: label + """ + function: str = "image-content-moderation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = ImageContentModerationInputs + outputs_class: Type[TO] = ImageContentModerationOutputs + + +class ReferencelessTextGenerationMetricDefaultInputs(Inputs): + hypotheses: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + + +class ReferencelessTextGenerationMetricDefaultOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class ReferencelessTextGenerationMetricDefault(AssetNode[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs]): + """ + The Referenceless Text Generation Metric Default is a function designed to +evaluate the quality of generated text without relying on reference texts for +comparison. + + InputType: text + OutputType: text + """ + function: str = "referenceless-text-generation-metric-default" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = ReferencelessTextGenerationMetricDefaultInputs + outputs_class: Type[TO] = ReferencelessTextGenerationMetricDefaultOutputs + + +class NamedEntityRecognitionInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + domain: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) + + +class NamedEntityRecognitionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class NamedEntityRecognition(AssetNode[NamedEntityRecognitionInputs, NamedEntityRecognitionOutputs]): + """ + Named Entity Recognition (NER) is a natural language processing task that +involves identifying and classifying proper nouns in text into predefined +categories such as names of people, organizations, locations, dates, and other +entities. + + InputType: text + OutputType: label + """ + function: str = "named-entity-recognition" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = NamedEntityRecognitionInputs + outputs_class: Type[TO] = NamedEntityRecognitionOutputs + + +class TextContentModerationInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class TextContentModerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class TextContentModeration(AssetNode[TextContentModerationInputs, TextContentModerationOutputs]): + """ + Text Content Moderation is the process of reviewing, filtering, and managing +user-generated content to ensure it adheres to community guidelines, legal +standards, and platform policies, thereby maintaining a safe and respectful +online environment. + + InputType: text + OutputType: label + """ + function: str = "text-content-moderation" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = TextContentModerationInputs + outputs_class: Type[TO] = TextContentModerationOutputs + + +class SpeakerDiarizationVideoInputs(Inputs): + video: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + + +class SpeakerDiarizationVideoOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) + + +class SpeakerDiarizationVideo(AssetNode[SpeakerDiarizationVideoInputs, SpeakerDiarizationVideoOutputs]): + """ + The Speaker Diarization Video function identifies and segments different +speakers in a video, attributing portions of the audio to individual speakers +to facilitate analysis and understanding of multi-speaker conversations. + + InputType: video + OutputType: label + """ + function: str = "speaker-diarization-video" + input_type: str = DataType.VIDEO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = SpeakerDiarizationVideoInputs + outputs_class: Type[TO] = SpeakerDiarizationVideoOutputs + + +class SplitOnSilenceInputs(Inputs): + audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + + +class SplitOnSilenceOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + + +class SplitOnSilence(AssetNode[SplitOnSilenceInputs, SplitOnSilenceOutputs]): + """ + The "Split On Silence" function divides an audio recording into separate +segments based on periods of silence, allowing for easier editing and analysis +of individual sections. + + InputType: audio + OutputType: audio + """ + function: str = "split-on-silence" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = SplitOnSilenceInputs + outputs_class: Type[TO] = SplitOnSilenceOutputs + + +class EmotionDetectionInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class EmotionDetectionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class EmotionDetection(AssetNode[EmotionDetectionInputs, EmotionDetectionOutputs]): + """ + Emotion Detection is a process that involves analyzing text to identify and +categorize the emotional states or sentiments expressed by individuals, such as +happiness, sadness, anger, or fear. + + InputType: text + OutputType: label + """ + function: str = "emotion-detection" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = EmotionDetectionInputs + outputs_class: Type[TO] = EmotionDetectionOutputs + + +class TextSpamDetectionInputs(Inputs): + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class TextSpamDetectionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class TextSpamDetection(AssetNode[TextSpamDetectionInputs, TextSpamDetectionOutputs]): + """ + Text Spam Detection is a process that involves analyzing and identifying +unsolicited or irrelevant messages within text communications, typically using +algorithms and machine learning techniques to filter out spam and ensure the +integrity of the communication platform. + + InputType: text + OutputType: label + """ + function: str = "text-spam-detection" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = TextSpamDetectionInputs + outputs_class: Type[TO] = TextSpamDetectionOutputs + + +class TranslationInputs(Inputs): + text: InputParam = None + sourcelanguage: InputParam = None + targetlanguage: InputParam = None + script_in: InputParam = None + script_out: InputParam = None + dialect_in: InputParam = None + dialect_out: InputParam = None + context: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) + self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) + self.script_in = self.create_param(code="script_in", data_type=DataType.LABEL, is_required=False) + self.script_out = self.create_param(code="script_out", data_type=DataType.LABEL, is_required=False) + self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) + self.dialect_out = self.create_param(code="dialect_out", data_type=DataType.LABEL, is_required=False) + self.context = self.create_param(code="context", data_type=DataType.LABEL, is_required=False) + + +class TranslationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class Translation(AssetNode[TranslationInputs, TranslationOutputs]): + """ + Translation is the process of converting text from one language into an +equivalent text in another language, preserving the original meaning and +context. + + InputType: text + OutputType: text + """ + function: str = "translation" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = TranslationInputs + outputs_class: Type[TO] = TranslationOutputs + + +class VoiceActivityDetectionInputs(Inputs): + audio: InputParam = None + onset: InputParam = None + offset: InputParam = None + min_duration_on: InputParam = None + min_duration_off: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.onset = self.create_param(code="onset", data_type=DataType.TEXT, is_required=False) + self.offset = self.create_param(code="offset", data_type=DataType.TEXT, is_required=False) + self.min_duration_on = self.create_param(code="min_duration_on", data_type=DataType.TEXT, is_required=False) + self.min_duration_off = self.create_param(code="min_duration_off", data_type=DataType.TEXT, is_required=False) + + +class VoiceActivityDetectionOutputs(Outputs): + data: OutputParam = None + audio: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + + +class VoiceActivityDetection(BaseSegmentor[VoiceActivityDetectionInputs, VoiceActivityDetectionOutputs]): + """ + Voice Activity Detection (VAD) is a technology that identifies the presence or +absence of human speech within an audio signal, enabling systems to distinguish +between spoken words and background noise. + + InputType: audio + OutputType: audio + """ + function: str = "voice-activity-detection" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = VoiceActivityDetectionInputs + outputs_class: Type[TO] = VoiceActivityDetectionOutputs + + +class SpeechEmbeddingInputs(Inputs): + audio: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class SpeechEmbeddingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class SpeechEmbedding(AssetNode[SpeechEmbeddingInputs, SpeechEmbeddingOutputs]): + """ + Speech Embedding is a process that transforms spoken language into a fixed- +dimensional vector representation, capturing essential features and +characteristics of the speech for tasks such as recognition, classification, +and analysis. + + InputType: audio + OutputType: text + """ + function: str = "speech-embedding" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = SpeechEmbeddingInputs + outputs_class: Type[TO] = SpeechEmbeddingOutputs + + +class SubtitlingTranslationInputs(Inputs): + text: InputParam = None + sourcelanguage: InputParam = None + dialect_in: InputParam = None + target_supplier: InputParam = None + targetlanguages: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) + self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) + self.target_supplier = self.create_param(code="target_supplier", data_type=DataType.LABEL, is_required=False) + self.targetlanguages = self.create_param(code="targetlanguages", data_type=DataType.LABEL, is_required=False) + + +class SubtitlingTranslationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class SubtitlingTranslation(AssetNode[SubtitlingTranslationInputs, SubtitlingTranslationOutputs]): + """ + Subtitling Translation is the process of converting spoken dialogue from one +language into written text in another language, which is then displayed on- +screen to aid viewers in understanding the content. + + InputType: text + OutputType: text + """ + function: str = "subtitling-translation" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = SubtitlingTranslationInputs + outputs_class: Type[TO] = SubtitlingTranslationOutputs + + +class TextGenerationInputs(Inputs): + text: InputParam = None + prompt: InputParam = None + context: InputParam = None + language: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.prompt = self.create_param(code="prompt", data_type=DataType.TEXT, is_required=False) + self.context = self.create_param(code="context", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class TextGenerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class TextGeneration(AssetNode[TextGenerationInputs, TextGenerationOutputs]): + """ + Text Generation is a process in which artificial intelligence models, such as +neural networks, produce coherent and contextually relevant text based on a +given input or prompt, often mimicking human writing styles and patterns. + + InputType: text + OutputType: text + """ + function: str = "text-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = TextGenerationInputs + outputs_class: Type[TO] = TextGenerationOutputs + + +class VideoUnderstandingInputs(Inputs): + video: InputParam = None + text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class VideoUnderstandingOutputs(Outputs): + text: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT) + + +class VideoUnderstanding(AssetNode[VideoUnderstandingInputs, VideoUnderstandingOutputs]): + """ + Video Understanding is the process of analyzing and interpreting video content +to extract meaningful information, such as identifying objects, actions, +events, and contextual relationships within the footage. + + InputType: video + OutputType: text + """ + function: str = "video-understanding" + input_type: str = DataType.VIDEO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = VideoUnderstandingInputs + outputs_class: Type[TO] = VideoUnderstandingOutputs + + +class TextToVideoGenerationInputs(Inputs): + text: InputParam = None + language: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + + +class TextToVideoGenerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) + + +class TextToVideoGeneration(AssetNode[TextToVideoGenerationInputs, TextToVideoGenerationOutputs]): + """ + Text To Video Generation is a process that converts written descriptions or +scripts into dynamic, visual video content using advanced algorithms and +artificial intelligence. + + InputType: text + OutputType: video + """ + function: str = "text-to-video-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.VIDEO + + inputs_class: Type[TI] = TextToVideoGenerationInputs + outputs_class: Type[TO] = TextToVideoGenerationOutputs + + +class TextNormalizationInputs(Inputs): + text: InputParam = None + language: InputParam = None + settings: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.settings = self.create_param(code="settings", data_type=DataType.TEXT, is_required=False) + + +class TextNormalizationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + + +class TextNormalization(AssetNode[TextNormalizationInputs, TextNormalizationOutputs]): + """ + Text normalization is the process of transforming text into a standard, +consistent format by correcting spelling errors, converting all characters to a +uniform case, removing punctuation, and expanding abbreviations to improve the +text's readability and usability for further processing or analysis. + + InputType: text + OutputType: label + """ + function: str = "text-normalization" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = TextNormalizationInputs + outputs_class: Type[TO] = TextNormalizationOutputs + + +class SpeechRecognitionInputs(Inputs): + language: InputParam = None + dialect: InputParam = None + voice: InputParam = None + source_audio: InputParam = None + script: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + + +class SpeechRecognitionOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class SpeechRecognition(AssetNode[SpeechRecognitionInputs, SpeechRecognitionOutputs]): + """ + Speech recognition is a technology that enables a computer or device to +identify and process spoken language, converting it into text. + + InputType: audio + OutputType: text + """ + function: str = "speech-recognition" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = SpeechRecognitionInputs + outputs_class: Type[TO] = SpeechRecognitionOutputs + + +class SubtitlingInputs(Inputs): + source_audio: InputParam = None + sourcelanguage: InputParam = None + dialect_in: InputParam = None + source_supplier: InputParam = None + target_supplier: InputParam = None + targetlanguages: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) + self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) + self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) + self.target_supplier = self.create_param(code="target_supplier", data_type=DataType.LABEL, is_required=False) + self.targetlanguages = self.create_param(code="targetlanguages", data_type=DataType.LABEL, is_required=False) + + +class SubtitlingOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class Subtitling(AssetNode[SubtitlingInputs, SubtitlingOutputs]): + """ + Subtitling is the process of displaying written text on a screen to represent +the spoken dialogue, narration, or other audio elements in a video, typically +to aid viewers who are deaf or hard of hearing, or to provide translations for +audiences who speak different languages. + + InputType: audio + OutputType: text + """ + function: str = "subtitling" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = SubtitlingInputs + outputs_class: Type[TO] = SubtitlingOutputs + + +class ClassificationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + lowerIsBetter: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.LABEL, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.LABEL, is_required=True) + self.lowerIsBetter = self.create_param(code="lowerIsBetter", data_type=DataType.TEXT, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + + +class ClassificationMetricOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.NUMBER) + + +class ClassificationMetric(AssetNode[ClassificationMetricInputs, ClassificationMetricOutputs]): + """ + A Classification Metric is a quantitative measure used to evaluate the quality +and effectiveness of classification models. + + InputType: text + OutputType: text + """ + function: str = "classification-metric" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT + + inputs_class: Type[TI] = ClassificationMetricInputs + outputs_class: Type[TO] = ClassificationMetricOutputs + + +class TextToImageGenerationInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class TextToImageGenerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.IMAGE) + + +class TextToImageGeneration(AssetNode[TextToImageGenerationInputs, TextToImageGenerationOutputs]): + """ + Text To Image Generation is a process where a system creates visual images +based on descriptive text input, translating written language into +corresponding graphical representations. + + InputType: text + OutputType: image + """ + function: str = "text-to-image-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.IMAGE + + inputs_class: Type[TI] = TextToImageGenerationInputs + outputs_class: Type[TO] = TextToImageGenerationOutputs + + + +class Pipeline(DefaultPipeline): + + def object_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ObjectDetection: + """ + Object Detection is a computer vision technology that identifies and locates +objects within an image, typically by drawing bounding boxes around the +detected objects and classifying them into predefined categories. + """ + return ObjectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentification: + """ + Language Identification is the process of automatically determining the +language in which a given piece of text is written. + """ + return LanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def ocr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Ocr: + """ + OCR, or Optical Character Recognition, is a technology that converts different +types of documents, such as scanned paper documents, PDFs, or images captured +by a digital camera, into editable and searchable data by recognizing and +extracting text from the images. + """ + return Ocr(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def script_execution(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ScriptExecution: + """ + Script Execution refers to the process of running a set of programmed +instructions or code within a computing environment, enabling the automated +performance of tasks, calculations, or operations as defined by the script. + """ + return ScriptExecution(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageLabelDetection: + """ + Image Label Detection is a function that automatically identifies and assigns +descriptive tags or labels to objects, scenes, or elements within an image, +enabling easier categorization, search, and analysis of visual content. + """ + return ImageLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_captioning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCaptioning: + """ + Image Captioning is a process that involves generating a textual description of +an image, typically using machine learning models to analyze the visual content +and produce coherent and contextually relevant sentences that describe the +objects, actions, and scenes depicted in the image. + """ + return ImageCaptioning(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioLanguageIdentification: + """ + Audio Language Identification is a process that involves analyzing an audio +recording to determine the language being spoken. + """ + return AudioLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def asr_age_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrAgeClassification: + """ + The ASR Age Classification function is designed to analyze audio recordings of +speech to determine the speaker's age group by leveraging automatic speech +recognition (ASR) technology and machine learning algorithms. + """ + return AsrAgeClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def benchmark_scoring_mt(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringMt: + """ + Benchmark Scoring MT is a function designed to evaluate and score machine +translation systems by comparing their output against a set of predefined +benchmarks, thereby assessing their accuracy and performance. + """ + return BenchmarkScoringMt(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def asr_gender_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrGenderClassification: + """ + The ASR Gender Classification function analyzes audio recordings to determine +and classify the speaker's gender based on their voice characteristics. + """ + return AsrGenderClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def base_model(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BaseModel: + """ + The Base-Model function serves as a foundational framework designed to provide +essential features and capabilities upon which more specialized or advanced +models can be built and customized. + """ + return BaseModel(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def language_identification_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentificationAudio: + """ + The Language Identification Audio function analyzes audio input to determine +and identify the language being spoken. + """ + return LanguageIdentificationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def loglikelihood(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Loglikelihood: + """ + The Log Likelihood function measures the probability of observing the given +data under a specific statistical model by taking the natural logarithm of the +likelihood function, thereby transforming the product of probabilities into a +sum, which simplifies the process of optimization and parameter estimation. + """ + return Loglikelihood(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def video_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoEmbedding: + """ + Video Embedding is a process that transforms video content into a fixed- +dimensional vector representation, capturing essential features and patterns to +facilitate tasks such as retrieval, classification, and recommendation. + """ + return VideoEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_segmenation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSegmenation: + """ + Text Segmentation is the process of dividing a continuous text into meaningful +units, such as words, sentences, or topics, to facilitate easier analysis and +understanding. + """ + return TextSegmenation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageEmbedding: + """ + Image Embedding is a process that transforms an image into a fixed-dimensional +vector representation, capturing its essential features and enabling efficient +comparison, retrieval, and analysis in various machine learning and computer +vision tasks. + """ + return ImageEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_manipulation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageManipulation: + """ + Image Manipulation refers to the process of altering or enhancing digital +images using various techniques and tools to achieve desired visual effects, +correct imperfections, or transform the image's appearance. + """ + return ImageManipulation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageToVideoGeneration: + """ + The Image To Video Generation function transforms a series of static images +into a cohesive, dynamic video sequence, often incorporating transitions, +effects, and synchronization with audio to create a visually engaging +narrative. + """ + return ImageToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioForcedAlignment: + """ + Audio Forced Alignment is a process that synchronizes a given audio recording +with its corresponding transcript by precisely aligning each spoken word or +phoneme to its exact timing within the audio. + """ + return AudioForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def benchmark_scoring_asr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringAsr: + """ + Benchmark Scoring ASR is a function that evaluates and compares the performance +of automatic speech recognition systems by analyzing their accuracy, speed, and +other relevant metrics against a standardized set of benchmarks. + """ + return BenchmarkScoringAsr(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def visual_question_answering(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisualQuestionAnswering: + """ + Visual Question Answering (VQA) is a task in artificial intelligence that +involves analyzing an image and providing accurate, contextually relevant +answers to questions posed about the visual content of that image. + """ + return VisualQuestionAnswering(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def document_image_parsing(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentImageParsing: + """ + Document Image Parsing is the process of analyzing and converting scanned or +photographed images of documents into structured, machine-readable formats by +identifying and extracting text, layout, and other relevant information. + """ + return DocumentImageParsing(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def document_information_extraction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentInformationExtraction: + """ + Document Information Extraction is the process of automatically identifying, +extracting, and structuring relevant data from unstructured or semi-structured +documents, such as invoices, receipts, contracts, and forms, to facilitate +easier data management and analysis. + """ + return DocumentInformationExtraction(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def depth_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DepthEstimation: + """ + Depth estimation is a computational process that determines the distance of +objects from a viewpoint, typically using visual data from cameras or sensors +to create a three-dimensional understanding of a scene. + """ + return DepthEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoGeneration: + """ + Video Generation is the process of creating video content through automated or +semi-automated means, often utilizing algorithms, artificial intelligence, or +software tools to produce visual and audio elements that can range from simple +animations to complex, realistic scenes. + """ + return VideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def referenceless_audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessAudioGenerationMetric: + """ + The Referenceless Audio Generation Metric is a tool designed to evaluate the +quality of generated audio content without the need for a reference or original +audio sample for comparison. + """ + return ReferencelessAudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def multi_class_image_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiClassImageClassification: + """ + Multi Class Image Classification is a machine learning task where an algorithm +is trained to categorize images into one of several predefined classes or +categories based on their visual content. + """ + return MultiClassImageClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def semantic_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SemanticSegmentation: + """ + Semantic segmentation is a computer vision process that involves classifying +each pixel in an image into a predefined category, effectively partitioning the +image into meaningful segments based on the objects or regions they represent. + """ + return SemanticSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def instance_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InstanceSegmentation: + """ + Instance segmentation is a computer vision task that involves detecting and +delineating each distinct object within an image, assigning a unique label and +precise boundary to every individual instance of objects, even if they belong +to the same category. + """ + return InstanceSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_colorization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageColorization: + """ + Image colorization is a process that involves adding color to grayscale images, +transforming them from black-and-white to full-color representations, often +using advanced algorithms and machine learning techniques to predict and apply +the appropriate hues and shades. + """ + return ImageColorization(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioGenerationMetric: + """ + The Audio Generation Metric is a quantitative measure used to evaluate the +quality, accuracy, and overall performance of audio generated by artificial +intelligence systems, often considering factors such as fidelity, +intelligibility, and similarity to human-produced audio. + """ + return AudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_impainting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageImpainting: + """ + Image inpainting is a process that involves filling in missing or damaged parts +of an image in a way that is visually coherent and seamlessly blends with the +surrounding areas, often using advanced algorithms and techniques to restore +the image to its original or intended appearance. + """ + return ImageImpainting(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def style_transfer(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> StyleTransfer: + """ + Style Transfer is a technique in artificial intelligence that applies the +visual style of one image (such as the brushstrokes of a famous painting) to +the content of another image, effectively blending the artistic elements of the +first image with the subject matter of the second. + """ + return StyleTransfer(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def multi_class_text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiClassTextClassification: + """ + Multi Class Text Classification is a natural language processing task that +involves categorizing a given text into one of several predefined classes or +categories based on its content. + """ + return MultiClassTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextEmbedding: + """ + Text embedding is a process that converts text into numerical vectors, +capturing the semantic meaning and contextual relationships of words or +phrases, enabling machines to understand and analyze natural language more +effectively. + """ + return TextEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def multi_label_text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiLabelTextClassification: + """ + Multi Label Text Classification is a natural language processing task where a +given text is analyzed and assigned multiple relevant labels or categories from +a predefined set, allowing for the text to belong to more than one category +simultaneously. + """ + return MultiLabelTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextReconstruction: + """ + Text Reconstruction is a process that involves piecing together fragmented or +incomplete text data to restore it to its original, coherent form. + """ + return TextReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def fact_checking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FactChecking: + """ + Fact Checking is the process of verifying the accuracy and truthfulness of +information, statements, or claims by cross-referencing with reliable sources +and evidence. + """ + return FactChecking(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechClassification: + """ + Speech Classification is a process that involves analyzing and categorizing +spoken language into predefined categories or classes based on various features +such as tone, pitch, and linguistic content. + """ + return SpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def intent_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> IntentClassification: + """ + Intent Classification is a natural language processing task that involves +analyzing and categorizing user text input to determine the underlying purpose +or goal behind the communication, such as booking a flight, asking for weather +information, or setting a reminder. + """ + return IntentClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def part_of_speech_tagging(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> PartOfSpeechTagging: + """ + Part of Speech Tagging is a natural language processing task that involves +assigning each word in a sentence its corresponding part of speech, such as +noun, verb, adjective, or adverb, based on its role and context within the +sentence. + """ + return PartOfSpeechTagging(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def metric_aggregation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MetricAggregation: + """ + Metric Aggregation is a function that computes and summarizes numerical data by +applying statistical operations, such as averaging, summing, or finding the +minimum and maximum values, to provide insights and facilitate analysis of +large datasets. + """ + return MetricAggregation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def dialect_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DialectDetection: + """ + Dialect Detection is a function that identifies and classifies the specific +regional or social variations of a language spoken or written by an individual, +enabling the recognition of distinct linguistic patterns and nuances associated +with different dialects. + """ + return DialectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def inverse_text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InverseTextNormalization: + """ + Inverse Text Normalization is the process of converting spoken or written +language in its normalized form, such as numbers, dates, and abbreviations, +back into their original, more complex or detailed textual representations. + """ + return InverseTextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_to_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToAudio: + """ + The Text to Audio function converts written text into spoken words, allowing +users to listen to the content instead of reading it. + """ + return TextToAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def fill_text_mask(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FillTextMask: + """ + The "Fill Text Mask" function takes a text input with masked or placeholder +characters and replaces those placeholders with specified or contextually +appropriate characters to generate a complete and coherent text output. + """ + return FillTextMask(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def video_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoContentModeration: + """ + Video Content Moderation is the process of reviewing, analyzing, and filtering +video content to ensure it adheres to community guidelines, legal standards, +and platform policies, thereby preventing the dissemination of inappropriate, +harmful, or illegal material. + """ + return VideoContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def extract_audio_from_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ExtractAudioFromVideo: + """ + The "Extract Audio From Video" function allows users to separate and save the +audio track from a video file, enabling them to obtain just the sound without +the accompanying visual content. + """ + return ExtractAudioFromVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_compression(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCompression: + """ + Image compression is a process that reduces the file size of an image by +removing redundant or non-essential data, while maintaining an acceptable level +of visual quality. + """ + return ImageCompression(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def multilingual_speech_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultilingualSpeechRecognition: + """ + Multilingual Speech Recognition is a technology that enables the automatic +transcription of spoken language into text across multiple languages, allowing +for seamless communication and understanding in diverse linguistic contexts. + """ + return MultilingualSpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def referenceless_text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessTextGenerationMetric: + """ + The Referenceless Text Generation Metric is a method for evaluating the quality +of generated text without requiring a reference text for comparison, often +leveraging models or algorithms to assess coherence, relevance, and fluency +based on intrinsic properties of the text itself. + """ + return ReferencelessTextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetricDefault: + """ + The "Text Generation Metric Default" function provides a standard set of +evaluation metrics for assessing the quality and performance of text generation +models. + """ + return TextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def noise_removal(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NoiseRemoval: + """ + Noise Removal is a process that involves identifying and eliminating unwanted +random variations or disturbances from an audio signal to enhance the clarity +and quality of the underlying information. + """ + return NoiseRemoval(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioReconstruction: + """ + Audio Reconstruction is the process of restoring or recreating audio signals +from incomplete, damaged, or degraded recordings to achieve a high-quality, +accurate representation of the original sound. + """ + return AudioReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def voice_cloning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceCloning: + """ + Voice cloning is a technology that uses artificial intelligence to create a +digital replica of a person's voice, allowing for the generation of speech that +mimics the tone, pitch, and speaking style of the original speaker. + """ + return VoiceCloning(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def diacritization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Diacritization: + """ + Diacritization is the process of adding diacritical marks to letters in a text +to indicate pronunciation, stress, tone, or meaning, often used in languages +such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in +written communication. + """ + return Diacritization(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioEmotionDetection: + """ + Audio Emotion Detection is a technology that analyzes vocal characteristics and +patterns in audio recordings to identify and classify the emotional state of +the speaker. + """ + return AudioEmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_summarization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSummarization: + """ + Text summarization is the process of condensing a large body of text into a +shorter version, capturing the main points and essential information while +maintaining coherence and meaning. + """ + return TextSummarization(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def entity_linking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EntityLinking: + """ + Entity Linking is the process of identifying and connecting mentions of +entities within a text to their corresponding entries in a structured knowledge +base, thereby enabling the disambiguation of terms and enhancing the +understanding of the text's context. + """ + return EntityLinking(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetric: + """ + A Text Generation Metric is a quantitative measure used to evaluate the quality +and effectiveness of text produced by natural language processing models, often +assessing aspects such as coherence, relevance, fluency, and adherence to given +prompts or instructions. + """ + return TextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def split_on_linebreak(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnLinebreak: + """ + The "Split On Linebreak" function divides a given string into a list of +substrings, using linebreaks (newline characters) as the points of separation. + """ + return SplitOnLinebreak(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def sentiment_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SentimentAnalysis: + """ + Sentiment Analysis is a natural language processing technique used to determine +and classify the emotional tone or subjective information expressed in a piece +of text, such as identifying whether the sentiment is positive, negative, or +neutral. + """ + return SentimentAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def keyword_spotting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> KeywordSpotting: + """ + Keyword Spotting is a function that enables the detection and identification of +specific words or phrases within a stream of audio, often used in voice- +activated systems to trigger actions or commands based on recognized keywords. + """ + return KeywordSpotting(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextClassification: + """ + Text Classification is a natural language processing task that involves +categorizing text into predefined labels or classes based on its content, +enabling automated organization, filtering, and analysis of large volumes of +textual data. + """ + return TextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def other__multipurpose_(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OtherMultipurpose: + """ + The "Other (Multipurpose)" function serves as a versatile category designed to +accommodate a wide range of tasks and activities that do not fit neatly into +predefined classifications, offering flexibility and adaptability for various +needs. + """ + return OtherMultipurpose(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def speech_synthesis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechSynthesis: + """ + Speech synthesis is the artificial production of human speech, typically +achieved through software or hardware systems that convert text into spoken +words, enabling machines to communicate verbally with users. + """ + return SpeechSynthesis(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_intent_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioIntentDetection: + """ + Audio Intent Detection is a process that involves analyzing audio signals to +identify and interpret the underlying intentions or purposes behind spoken +words, enabling systems to understand and respond appropriately to human +speech. + """ + return AudioIntentDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def video_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoLabelDetection: + """ + Video Label Detection is a function that automatically identifies and tags +various objects, scenes, activities, and other relevant elements within a +video, providing descriptive labels that enhance searchability and content +organization. + """ + return VideoLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def asr_quality_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrQualityEstimation: + """ + ASR Quality Estimation is a process that evaluates the accuracy and reliability +of automatic speech recognition systems by analyzing their performance in +transcribing spoken language into text. + """ + return AsrQualityEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_transcript_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptAnalysis: + """ + Audio Transcript Analysis is a process that involves converting spoken language +from audio recordings into written text, followed by examining and interpreting +the transcribed content to extract meaningful insights, identify patterns, and +derive actionable information. + """ + return AudioTranscriptAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def search(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Search: + """ + The "Search" function allows users to input keywords or phrases to quickly +locate specific information, files, or content within a database, website, or +application. + """ + return Search(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def video_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoForcedAlignment: + """ + Video Forced Alignment is a process that synchronizes video footage with +corresponding audio tracks by precisely aligning the visual and auditory +elements, ensuring that the movements of speakers' lips match the spoken words. + """ + return VideoForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def viseme_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisemeGeneration: + """ + Viseme Generation is the process of creating visual representations of +phonemes, which are the distinct units of sound in speech, to synchronize lip +movements with spoken words in animations or virtual avatars. + """ + return VisemeGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def topic_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TopicClassification: + """ + Topic Classification is a natural language processing function that categorizes +text into predefined topics or subjects based on its content, enabling +efficient organization and retrieval of information. + """ + return TopicClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def offensive_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OffensiveLanguageIdentification: + """ + Offensive Language Identification is a function that analyzes text to detect +and flag language that is abusive, harmful, or inappropriate, helping to +maintain a respectful and safe communication environment. + """ + return OffensiveLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def speech_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechTranslation: + """ + Speech Translation is a technology that converts spoken language in real-time +from one language to another, enabling seamless communication between speakers +of different languages. + """ + return SpeechTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def speaker_diarization_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationAudio: + """ + Speaker Diarization Audio is a process that involves segmenting an audio +recording into distinct sections, each corresponding to a different speaker, in +order to identify and differentiate between multiple speakers within the same +audio stream. + """ + return SpeakerDiarizationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_transcript_improvement(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptImprovement: + """ + Audio Transcript Improvement is a function that enhances the accuracy and +clarity of transcribed audio recordings by correcting errors, refining +language, and ensuring the text faithfully represents the original spoken +content. + """ + return AudioTranscriptImprovement(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def speech_non_speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechNonSpeechClassification: + """ + The function "Speech or Non-Speech Classification" is designed to analyze audio +input and determine whether the sound is human speech or non-speech noise, +enabling applications such as voice recognition systems to filter out +irrelevant background sounds. + """ + return SpeechNonSpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_denormalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextDenormalization: + """ + Text Denormalization is the process of converting abbreviated, contracted, or +otherwise simplified text into its full, standard form, often to improve +readability and ensure consistency in natural language processing tasks. + """ + return TextDenormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def image_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageContentModeration: + """ + Image Content Moderation is a process that involves analyzing and filtering +images to detect and manage inappropriate, harmful, or sensitive content, +ensuring compliance with community guidelines and legal standards. + """ + return ImageContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def referenceless_text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessTextGenerationMetricDefault: + """ + The Referenceless Text Generation Metric Default is a function designed to +evaluate the quality of generated text without relying on reference texts for +comparison. + """ + return ReferencelessTextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def named_entity_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NamedEntityRecognition: + """ + Named Entity Recognition (NER) is a natural language processing task that +involves identifying and classifying proper nouns in text into predefined +categories such as names of people, organizations, locations, dates, and other +entities. + """ + return NamedEntityRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextContentModeration: + """ + Text Content Moderation is the process of reviewing, filtering, and managing +user-generated content to ensure it adheres to community guidelines, legal +standards, and platform policies, thereby maintaining a safe and respectful +online environment. + """ + return TextContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def speaker_diarization_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationVideo: + """ + The Speaker Diarization Video function identifies and segments different +speakers in a video, attributing portions of the audio to individual speakers +to facilitate analysis and understanding of multi-speaker conversations. + """ + return SpeakerDiarizationVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def split_on_silence(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnSilence: + """ + The "Split On Silence" function divides an audio recording into separate +segments based on periods of silence, allowing for easier editing and analysis +of individual sections. + """ + return SplitOnSilence(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EmotionDetection: + """ + Emotion Detection is a process that involves analyzing text to identify and +categorize the emotional states or sentiments expressed by individuals, such as +happiness, sadness, anger, or fear. + """ + return EmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_spam_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSpamDetection: + """ + Text Spam Detection is a process that involves analyzing and identifying +unsolicited or irrelevant messages within text communications, typically using +algorithms and machine learning techniques to filter out spam and ensure the +integrity of the communication platform. + """ + return TextSpamDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Translation: + """ + Translation is the process of converting text from one language into an +equivalent text in another language, preserving the original meaning and +context. + """ + return Translation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def voice_activity_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceActivityDetection: + """ + Voice Activity Detection (VAD) is a technology that identifies the presence or +absence of human speech within an audio signal, enabling systems to distinguish +between spoken words and background noise. + """ + return VoiceActivityDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def speech_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechEmbedding: + """ + Speech Embedding is a process that transforms spoken language into a fixed- +dimensional vector representation, capturing essential features and +characteristics of the speech for tasks such as recognition, classification, +and analysis. + """ + return SpeechEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def subtitling_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SubtitlingTranslation: + """ + Subtitling Translation is the process of converting spoken dialogue from one +language into written text in another language, which is then displayed on- +screen to aid viewers in understanding the content. + """ + return SubtitlingTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGeneration: + """ + Text Generation is a process in which artificial intelligence models, such as +neural networks, produce coherent and contextually relevant text based on a +given input or prompt, often mimicking human writing styles and patterns. + """ + return TextGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def video_understanding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoUnderstanding: + """ + Video Understanding is the process of analyzing and interpreting video content +to extract meaningful information, such as identifying objects, actions, +events, and contextual relationships within the footage. + """ + return VideoUnderstanding(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToVideoGeneration: + """ + Text To Video Generation is a process that converts written descriptions or +scripts into dynamic, visual video content using advanced algorithms and +artificial intelligence. + """ + return TextToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextNormalization: + """ + Text normalization is the process of transforming text into a standard, +consistent format by correcting spelling errors, converting all characters to a +uniform case, removing punctuation, and expanding abbreviations to improve the +text's readability and usability for further processing or analysis. + """ + return TextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def speech_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechRecognition: + """ + Speech recognition is a technology that enables a computer or device to +identify and process spoken language, converting it into text. + """ + return SpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def subtitling(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Subtitling: + """ + Subtitling is the process of displaying written text on a screen to represent +the spoken dialogue, narration, or other audio elements in a video, typically +to aid viewers who are deaf or hard of hearing, or to provide translations for +audiences who speak different languages. + """ + return Subtitling(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def classification_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ClassificationMetric: + """ + A Classification Metric is a quantitative measure used to evaluate the quality +and effectiveness of classification models. + """ + return ClassificationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def text_to_image_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToImageGeneration: + """ + Text To Image Generation is a process where a system creates visual images +based on descriptive text input, translating written language into +corresponding graphical representations. + """ + return TextToImageGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + diff --git a/pyproject.toml b/pyproject.toml index 73980717..5b0ded4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,8 @@ dependencies = [ "filetype>=1.2.0", "click>=7.1.2,<8.0.0", "PyYAML>=6.0.1", - "dataclasses-json>=0.5.2" + "dataclasses-json>=0.5.2", + "Jinja2==3.1.4", ] [project.urls] diff --git a/tests/functional/pipelines/data/script.py b/tests/functional/pipelines/data/script.py new file mode 100644 index 00000000..3403fa61 --- /dev/null +++ b/tests/functional/pipelines/data/script.py @@ -0,0 +1,51 @@ +__author__ = "thiagocastroferreira" + +import argparse +import json + + +def main(transcripts, speakers, output_file): + # get the speech recognition json + transcripts = json.load(open(transcripts)) + # get the speaker diarization json + speakers = json.load(open(speakers)) + + # build the response + response = [] + for i, transcript in enumerate(transcripts): + merge = { + "transcript": transcript["attributes"]["data"], + "speaker": speakers[i]["attributes"]["data"]["data"], + } + response.append( + { + "index": i, + "success": True, + "input_type": "text", + "is_url": transcript["is_url"], + "details": {}, + "input_segment_info": transcript["input_segment_info"], + "attributes": {"data": merge, "input": merge}, + } + ) + + # save the response, based on the intermediate representation format, in the output_file + with open(output_file, "w") as f: + json.dump(response, f) + + +if __name__ == "__main__": + # Create the parser + parser = argparse.ArgumentParser() + # Add arguments + parser.add_argument("--transcripts", type=str, required=True) + parser.add_argument("--speakers", type=str, required=True) + parser.add_argument("--output_file", type=str, required=True) + # Parse the argument + args = parser.parse_args() + + transcripts = args.transcripts + speakers = args.speakers + output_file = args.output_file + + main(transcripts, speakers, output_file) diff --git a/tests/functional/pipelines/designer_test.py b/tests/functional/pipelines/designer_test.py new file mode 100644 index 00000000..62f42f7e --- /dev/null +++ b/tests/functional/pipelines/designer_test.py @@ -0,0 +1,248 @@ +import pytest + +from aixplain.enums import DataType +from aixplain.factories import PipelineFactory +from aixplain.modules.pipeline.designer import ( + Link, + Operation, + Route, + RouteType, +) +from aixplain.modules import Pipeline +from aixplain.modules.pipeline.designer import AssetNode +from uuid import uuid4 + + +@pytest.fixture +def pipeline(): + # Setup: Initialize the pipeline + pipeline = PipelineFactory.init( + name=str(uuid4()), + ) + + # Yield control back to the test function + yield pipeline + + # Teardown: Ensure the pipeline is deleted + if pipeline is not None: + pipeline.delete() + + +def test_create_asr_pipeline(pipeline): + # add nodes to the pipeline + input = pipeline.input() + model1 = AssetNode(asset_id="60ddefab8d38c51c5885ee38") + pipeline.add_node(model1) + + model2 = AssetNode(asset_id="60ddefd68d38c51c588608f1") + pipeline.add_node(model2) + + # link the nodes + link1 = Link( + from_node=input, + to_node=model1, + from_param="input", + to_param="source_audio", + ) + pipeline.add_link(link1) + + link2 = Link( + from_node=model1, + to_node=model2, + from_param="data", + to_param="text", + ) + pipeline.add_link(link2) + + # use the output of the last node + model1.use_output("data") + model2.use_output("data") + + # save the pipeline as draft + pipeline.save() + + assert isinstance(pipeline, Pipeline) + assert pipeline.id != "" + + +def test_create_mt_pipeline_and_run(pipeline): + # add nodes to the pipeline + input = pipeline.input() + model1 = pipeline.translation(asset_id="60ddef828d38c51c5885d491") + output = pipeline.output() + + # link the nodes + input.link( + to_node=model1, + from_param=input.outputs.input, + to_param=model1.inputs.text, + ) + + # use the output of the last node + model1.link( + to_node=output, + from_param=model1.outputs.data, + to_param=output.inputs.output, + ) + + # save the pipeline as an asset + pipeline.save(save_as_asset=True) + + assert isinstance(pipeline, Pipeline) + assert pipeline.id != "" + + pipeline = PipelineFactory.get(pipeline.id) + + # run the pipeline + output = pipeline.run( + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", + **{"batchmode": False, "version": "2.0"}, + ) + assert output["status"] == "SUCCESS" + + +def test_routing_pipeline(pipeline): + + TRANSLATION_ASSET = "60ddefae8d38c51c5885eff7" + SPEECH_RECOGNITION_ASSET = "621cf3fa6442ef511d2830af" + + input = pipeline.input() + translation = pipeline.asset(TRANSLATION_ASSET) + speech_recognition = pipeline.asset(SPEECH_RECOGNITION_ASSET) + + input.route( + translation.inputs.text, speech_recognition.inputs.source_audio + ) + + translation.use_output("data") + speech_recognition.use_output("data") + + pipeline.save() + + output = pipeline.run("This is a sample text!") + + assert output["status"] == "SUCCESS" + assert output.get("data") is not None + assert len(output["data"]) > 0 + assert output["data"][0].get("segments") is not None + assert len(output["data"][0]["segments"]) > 0 + + +def test_scripting_pipeline(pipeline): + + SPEAKER_DIARIZATION_AUDIO_ASSET = "62fab6ecb39cca09ca5bc365" + SPEECH_RECOGNITION_ASSET = "621cf3fa6442ef511d2830af" + + input = pipeline.input() + + segmentor = pipeline.speaker_diarization_audio( + asset_id=SPEAKER_DIARIZATION_AUDIO_ASSET + ) + + speech_recognition = pipeline.speech_recognition( + asset_id=SPEECH_RECOGNITION_ASSET + ) + + script = pipeline.script( + script_path="tests/functional/pipelines/data/script.py" + ) + script.inputs.create_param(code="transcripts", data_type=DataType.TEXT) + script.inputs.create_param(code="speakers", data_type=DataType.LABEL) + script.outputs.create_param(code="data", data_type=DataType.TEXT) + + input.outputs.input.link(segmentor.inputs.audio) + segmentor.outputs.audio.link(speech_recognition.inputs.source_audio) + segmentor.outputs.data.link(script.inputs.speakers) + speech_recognition.outputs.data.link(script.inputs.transcripts) + + script.use_output("data") + + pipeline.save() + + output = pipeline.run( + "s3://aixplain-platform-assets/samples/en/CPAC1x2.wav", + version="2.0", + ) + + assert output["status"] == "SUCCESS" + assert output.get("data") is not None + assert len(output["data"]) > 0 + assert output["data"][0].get("segments") is not None + assert len(output["data"][0]["segments"]) > 0 + + +def test_decision_pipeline(pipeline): + + SENTIMENT_ANALYSIS_ASSET = "6172874f720b09325cbcdc33" + + input = pipeline.input() + + sentiment_analysis = pipeline.sentiment_analysis( + asset_id=SENTIMENT_ANALYSIS_ASSET + ) + + positive_output = pipeline.output() + negative_output = pipeline.output() + decision_node = pipeline.decision( + routes=[ + Route( + type=RouteType.CHECK_VALUE, + operation=Operation.EQUAL, + value="POSITIVE", + path=[positive_output], + ), + Route( + type=RouteType.CHECK_VALUE, + operation=Operation.DIFFERENT, + value="POSITIVE", + path=[negative_output], + ), + ] + ) + + input.outputs.input.link(sentiment_analysis.inputs.text) + sentiment_analysis.outputs.data.link(decision_node.inputs.comparison) + input.outputs.input.link(decision_node.inputs.passthrough) + decision_node.outputs.input.link(positive_output.inputs.output) + decision_node.outputs.input.link(negative_output.inputs.output) + + pipeline.save() + + output = pipeline.run("I feel so bad today!") + + assert output["status"] == "SUCCESS" + assert output.get("data") is not None + assert len(output["data"]) > 0 + assert output["data"][0].get("segments") is not None + assert len(output["data"][0]["segments"]) > 0 + + +def test_reconstructing_pipeline(pipeline): + input = pipeline.input() + + segmentor = pipeline.speaker_diarization_audio( + asset_id="62fab6ecb39cca09ca5bc365" + ) + + speech_recognition = pipeline.speech_recognition( + asset_id="60ddefab8d38c51c5885ee38" + ) + + reconstructor = pipeline.bare_reconstructor() + + input.outputs.input.link(segmentor.inputs.audio) + segmentor.outputs.audio.link(speech_recognition.inputs.source_audio) + speech_recognition.outputs.data.link(reconstructor.inputs.data) + + reconstructor.use_output("data") + + pipeline.save() + + output = pipeline.run( + "s3://aixplain-platform-assets/samples/en/CPAC1x2.wav", + ) + assert output["status"] == "SUCCESS" + assert output.get("data") is not None + assert len(output["data"]) > 0 + assert output["data"][0].get("segments") is not None + assert len(output["data"][0]["segments"]) > 0 diff --git a/tests/unit/designer_test.py b/tests/unit/designer_test.py new file mode 100644 index 00000000..766c7e54 --- /dev/null +++ b/tests/unit/designer_test.py @@ -0,0 +1,707 @@ +import pytest +import unittest.mock as mock + + +from aixplain.enums import DataType +from aixplain.modules.pipeline.designer.base import ( + Node, + Link, + Param, + ParamProxy, + Inputs, + Outputs, + InputParam, + OutputParam, +) + +from aixplain.modules.pipeline.designer.enums import ( + ParamType, + NodeType, +) + +from aixplain.modules.pipeline.designer.mixins import LinkableMixin +from aixplain.modules.pipeline.designer.pipeline import DesignerPipeline + + +def test_create_node(): + + pipeline = DesignerPipeline() + + class BareNode(Node): + pass + + with mock.patch( + "aixplain.modules.pipeline.designer.Node.attach_to" + ) as mock_attach_to: + node = BareNode(number=3, label="FOO") + mock_attach_to.assert_not_called() + assert isinstance(node.inputs, Inputs) + assert isinstance(node.outputs, Outputs) + assert node.number == 3 + assert node.label == "FOO" + + class FooNodeInputs(Inputs): + pass + + class FooNodeOutputs(Outputs): + pass + + class FooNode(Node[FooNodeInputs, FooNodeOutputs]): + inputs_class = FooNodeInputs + outputs_class = FooNodeOutputs + + with mock.patch( + "aixplain.modules.pipeline.designer.Node.attach_to" + ) as mock_attach_to: + node = FooNode(pipeline=pipeline, number=3, label="FOO") + mock_attach_to.assert_called_once_with(pipeline) + assert isinstance(node.inputs, FooNodeInputs) + assert isinstance(node.outputs, FooNodeOutputs) + assert node.number == 3 + assert node.label == "FOO" + + +def test_node_attach_to(): + + pipeline = DesignerPipeline() + + class BareNode(Node): + pass + + node = BareNode() + with pytest.raises(AssertionError) as excinfo: + node.attach_to(pipeline) + + assert "Node type not set" in str(excinfo.value) + + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + a = AssetNode() + b = AssetNode() + c = AssetNode() + d = AssetNode(number=8) + e = AssetNode(number=8) + + a.attach_to(pipeline) + b.attach_to(pipeline) + assert b.pipeline is pipeline + assert b.number == 1 + assert b.label == "ASSET(ID=1)" + assert b in pipeline.nodes + assert len(pipeline.nodes) == 2 + + c.attach_to(pipeline) + assert c.pipeline is pipeline + assert c.number == 2 + assert c.label == "ASSET(ID=2)" + assert c in pipeline.nodes + assert len(pipeline.nodes) == 3 + + d.attach_to(pipeline) + assert d.pipeline is pipeline + assert d.number == 8 + assert d.label == "ASSET(ID=8)" + assert d in pipeline.nodes + assert len(pipeline.nodes) == 4 + + with pytest.raises(AssertionError) as excinfo: + e.attach_to(pipeline) + + assert "Node number already exists" in str(excinfo.value) + + +def test_node_serialize(): + pipeline = DesignerPipeline() + + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node = AssetNode() + + with mock.patch.object(node.inputs, "serialize") as mock_inputs_serialize: + with mock.patch.object( + node.outputs, "serialize" + ) as mock_outputs_serialize: + assert node.serialize() == { + "number": node.number, + "type": NodeType.ASSET, + "inputValues": mock_inputs_serialize.return_value, + "outputValues": mock_outputs_serialize.return_value, + "label": node.label, + } + node.attach_to(pipeline) + mock_inputs_serialize.assert_called_once() + mock_outputs_serialize.assert_called_once() + mock_inputs_serialize.reset_mock() + mock_outputs_serialize.reset_mock() + + assert node.serialize() == { + "number": node.number, + "type": NodeType.ASSET, + "inputValues": mock_inputs_serialize.return_value, + "outputValues": mock_outputs_serialize.return_value, + "label": node.label, + } + mock_inputs_serialize.assert_called_once() + mock_outputs_serialize.assert_called_once() + + +def test_create_param(): + + class TypedParam(Param): + param_type = ParamType.INPUT + + with mock.patch( + "aixplain.modules.pipeline.designer.Param.attach_to" + ) as mock_attach_to: + param = TypedParam( + code="param", + data_type=DataType.TEXT, + value="foo", + ) + mock_attach_to.assert_not_called() + + assert param.code == "param" + assert param.data_type == DataType.TEXT + assert param.value == "foo" + assert param.param_type == ParamType.INPUT + + with mock.patch( + "aixplain.modules.pipeline.designer.Param.attach_to" + ) as mock_attach_to: + param = TypedParam( + code="param", + data_type=DataType.TEXT, + value="foo", + param_type=ParamType.OUTPUT, + ) + mock_attach_to.assert_not_called() + + assert param.code == "param" + assert param.data_type == DataType.TEXT + assert param.value == "foo" + assert param.param_type == ParamType.INPUT + + class UnTypedParam(Param): + pass + + with mock.patch( + "aixplain.modules.pipeline.designer.Param.attach_to" + ) as mock_attach_to: + param = UnTypedParam( + code="param", + data_type=DataType.TEXT, + value="foo", + param_type=ParamType.OUTPUT, + ) + mock_attach_to.assert_not_called() + + assert param.param_type == ParamType.OUTPUT + + with mock.patch( + "aixplain.modules.pipeline.designer.Param.attach_to" + ) as mock_attach_to: + param = UnTypedParam( + code="param", + data_type=DataType.TEXT, + value="foo", + param_type=ParamType.INPUT, + ) + mock_attach_to.assert_not_called() + + assert param.param_type == ParamType.INPUT + + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node = AssetNode() + + with mock.patch( + "aixplain.modules.pipeline.designer.Param.attach_to" + ) as mock_attach_to: + param = UnTypedParam( + code="param", + data_type=DataType.TEXT, + value="foo", + param_type=ParamType.INPUT, + node=node, + ) + mock_attach_to.assert_called_once_with(node) + + +@pytest.mark.parametrize( + "param_cls, expected_param_type", + [ + (InputParam, ParamType.INPUT), + (OutputParam, ParamType.OUTPUT), + ], +) +def test_create_input_output_param(param_cls, expected_param_type): + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node = AssetNode() + + with mock.patch( + "aixplain.modules.pipeline.designer.Param.attach_to" + ) as mock_attach_to: + param = param_cls( + code="param", data_type=DataType.TEXT, value="foo", node=node + ) + mock_attach_to.assert_called_once_with(node) + assert param.code == "param" + assert param.data_type == DataType.TEXT + assert param.value == "foo" + assert param.param_type == expected_param_type + assert not param.node + + +def test_param_attach_to(): + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node = AssetNode() + + class NoTypeParam(Param): + pass + + param = NoTypeParam(code="param", data_type=DataType.TEXT, value="foo") + with pytest.raises(AssertionError) as excinfo: + param.attach_to(node) + + assert "Param type not set" in str(excinfo.value) + + input = InputParam(code="input", data_type=DataType.TEXT, value="foo") + + with mock.patch.object(node.inputs, "add_param") as mock_add_param: + input.attach_to(node) + mock_add_param.assert_called_once_with(input) + assert input.node is node + + with pytest.raises(AssertionError) as excinfo: + input.attach_to(node) + + assert "Param already attached to a node" in str(excinfo.value) + + output = OutputParam(code="output", data_type=DataType.TEXT, value="bar") + + with mock.patch.object(node.outputs, "add_param") as mock_add_param: + output.attach_to(node) + mock_add_param.assert_called_once_with(output) + assert output.node is node + + +def test_param_link(): + input = InputParam(code="input", data_type=DataType.TEXT, value="foo") + output = OutputParam(code="output", data_type=DataType.TEXT, value="bar") + + with pytest.raises(AssertionError) as excinfo: + output.link(input) + + assert "Param not attached to a node" in str(excinfo.value) + + class AssetNode(Node, LinkableMixin): + type: NodeType = NodeType.ASSET + + a = AssetNode() + b = AssetNode() + + output = OutputParam(code="output", data_type=DataType.TEXT, value="bar") + output.node = a + input = InputParam(code="input", data_type=DataType.TEXT, value="foo") + input.node = b + + with pytest.raises(AssertionError) as excinfo: + input.link(output) + + assert "Invalid param type" in str(excinfo.value) + + with pytest.raises(AssertionError) as excinfo: + output.link(input) + + assert "Param not registered as output" in str(excinfo.value) + + output = OutputParam( + code="output", data_type=DataType.TEXT, value="bar", node=a + ) + input = InputParam( + code="input", data_type=DataType.TEXT, value="foo", node=b + ) + + with mock.patch.object(input, "back_link") as mock_back_link: + output.link(input) + mock_back_link.assert_called_once_with(output) + + +def test_param_back_link(): + input = InputParam(code="input", data_type=DataType.TEXT, value="foo") + output = OutputParam(code="output", data_type=DataType.TEXT, value="bar") + + with pytest.raises(AssertionError) as excinfo: + input.back_link(output) + + assert "Param not attached to a node" in str(excinfo.value) + + class AssetNode(Node, LinkableMixin): + type: NodeType = NodeType.ASSET + + a = AssetNode() + b = AssetNode() + + output = OutputParam(code="output", data_type=DataType.TEXT, value="bar") + output.node = a + input = InputParam(code="input", data_type=DataType.TEXT, value="foo") + input.node = b + + with pytest.raises(AssertionError) as excinfo: + output.back_link(input) + + assert "Invalid param type" in str(excinfo.value) + + with pytest.raises(AssertionError) as excinfo: + input.back_link(output) + + assert "Param not registered as input" in str(excinfo.value) + + output = OutputParam( + code="output", data_type=DataType.TEXT, value="bar", node=a + ) + input = InputParam( + code="input", data_type=DataType.TEXT, value="foo", node=b + ) + + with mock.patch.object(a, "link") as mock_link: + input.back_link(output) + mock_link.assert_called_once_with(b, output, input) + + +def test_create_pipeline(): + pipeline = DesignerPipeline() + + assert pipeline.nodes == [] + assert pipeline.links == [] + assert not pipeline.instance + + +def test_link_create(): + class AssetNode(Node, LinkableMixin): + type: NodeType = NodeType.ASSET + + a = AssetNode() + b = AssetNode() + + with pytest.raises(AssertionError) as excinfo: + link = Link( + from_node=a, + to_node=b, + from_param="output", + to_param="input", + ) + + assert "Invalid from param" in str(excinfo.value) + + a.outputs.create_param("output", DataType.TEXT, "foo") + + with pytest.raises(AssertionError) as excinfo: + link = Link( + from_node=a, + to_node=b, + from_param="output", + to_param="input", + ) + + assert "Invalid to param" in str(excinfo.value) + + b.inputs.create_param("input", DataType.TEXT, "bar") + + link = Link( + from_node=a, + to_node=b, + from_param="output", + to_param="input", + ) + + assert link.from_node == a + assert link.to_node == b + assert link.from_param == "output" + assert link.to_param == "input" + + pipeline = DesignerPipeline() + + with mock.patch( + "aixplain.modules.pipeline.designer.Link.attach_to" + ) as mock_attach_to: + link = Link( + from_node=a, + to_node=b, + from_param="output", + to_param="input", + pipeline=pipeline, + ) + mock_attach_to.assert_called_once_with(pipeline) + + +def test_link_attach_to(): + + pipeline = DesignerPipeline() + + class AssetNode(Node, LinkableMixin): + type: NodeType = NodeType.ASSET + + a = AssetNode() + b = AssetNode() + + a.outputs.create_param("output", DataType.TEXT, "foo") + b.inputs.create_param("input", DataType.TEXT, "bar") + + link = Link( + from_node=a, + to_node=b, + from_param="output", + to_param="input", + ) + + with mock.patch.object(a, "attach_to") as mock_a_attach_to: + with mock.patch.object(b, "attach_to") as mock_b_attach_to: + link.attach_to(pipeline) + mock_a_attach_to.assert_called_once_with(pipeline) + mock_b_attach_to.assert_called_once_with(pipeline) + assert link.pipeline is pipeline + assert link in pipeline.links + + a = AssetNode(pipeline=pipeline) + b = AssetNode(pipeline=pipeline) + a.outputs.create_param("output", DataType.TEXT, "foo") + b.inputs.create_param("input", DataType.TEXT, "bar") + + link = Link( + from_node=a, + to_node=b, + from_param="output", + to_param="input", + ) + + with mock.patch.object(a, "attach_to") as mock_a_attach_to: + with mock.patch.object(b, "attach_to") as mock_b_attach_to: + link.attach_to(pipeline) + mock_a_attach_to.assert_not_called() + mock_b_attach_to.assert_not_called() + assert link.pipeline is pipeline + assert link in pipeline.links + + with pytest.raises(AssertionError) as excinfo: + link.attach_to(pipeline) + + assert "Link already attached to a pipeline" in str(excinfo.value) + + +def test_link_serialize(): + pipeline = DesignerPipeline() + + class AssetNode(Node, LinkableMixin): + type: NodeType = NodeType.ASSET + + a = AssetNode() + b = AssetNode() + a.outputs.create_param("output", DataType.TEXT, "foo") + b.inputs.create_param("input", DataType.TEXT, "bar") + + link = Link( + from_node=a, + to_node=b, + from_param="output", + to_param="input", + ) + + with pytest.raises(AssertionError) as excinfo: + link.serialize() + + assert "From node number not set" in str(excinfo.value) + a.attach_to(pipeline) + + with pytest.raises(AssertionError) as excinfo: + link.serialize() + + assert "To node number not set" in str(excinfo.value) + b.attach_to(pipeline) + + link = Link( + from_node=a, + to_node=b, + from_param="output", + to_param="input", + ) + + assert link.serialize() == { + "from": a.number, + "to": b.number, + "paramMapping": [ + {"from": "output", "to": "input"}, + ], + } + + +def test_create_param_proxy(): + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node = AssetNode() + + param_proxy = ParamProxy(node) + assert param_proxy.node is node + assert param_proxy._params == [] + + +def test_param_proxy_add_param(): + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node = AssetNode() + + param_proxy = ParamProxy(node) + + class FooParam(Param): + pass + + param = FooParam(code="foo", data_type=DataType.TEXT) + param_proxy.add_param(param) + assert param in param_proxy._params + assert hasattr(param_proxy, "foo") + assert param_proxy.foo is param + assert param_proxy.foo.code == "foo" + assert param_proxy.foo.data_type == DataType.TEXT + + with pytest.raises(ValueError) as excinfo: + param_proxy.add_param(param) + + assert "Parameter with code 'foo' already exists." in str(excinfo.value) + + +def test_param_proxy_create_param(): + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node = AssetNode() + + param_proxy = ParamProxy(node) + + with mock.patch.object(param_proxy, "_create_param") as mock_create_param: + with mock.patch.object(param_proxy, "add_param") as mock_add_param: + param = param_proxy.create_param( + "foo", DataType.TEXT, "bar", is_required=True + ) + mock_create_param.assert_called_once_with( + "foo", DataType.TEXT, "bar" + ) + mock_add_param.assert_called_once_with(param) + assert param.is_required is True + + +def test_param_proxy_attr_access(): + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node = AssetNode() + + param_proxy = ParamProxy(node) + + class FooParam(Param): + pass + + param = FooParam(code="foo", data_type=DataType.TEXT) + param_proxy.add_param(param) + + assert param in param_proxy + assert "foo" in param_proxy + assert param_proxy["foo"] is param + assert param_proxy.foo is param + + with pytest.raises(KeyError) as excinfo: + param_proxy["bar"] + + assert "'bar'" in str(excinfo.value) + + +def test_node_link(): + + class AssetNode(Node, LinkableMixin): + type: NodeType = NodeType.ASSET + + a = AssetNode() + b = AssetNode() + + output = OutputParam( + code="output", data_type=DataType.TEXT, value="bar", node=a + ) + input = InputParam( + code="input", data_type=DataType.TEXT, value="foo", node=b + ) + + # here too lazy to mock Link class properly + # checking the output instance instead + link = a.link(b, from_param=output, to_param=input) + assert isinstance(link, Link) + assert link.from_node == a + assert link.to_node == b + assert link.from_param == "output" + assert link.to_param == "input" + + +def test_pipeline_add_node(): + pipeline = DesignerPipeline() + + class InputNode(Node): + type: NodeType = NodeType.INPUT + + node = InputNode() + pipeline.add_node(node) + assert pipeline.nodes == [node] + assert pipeline.links == [] + + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + node1 = AssetNode() + with mock.patch.object(node1, "attach_to") as mock_attach_to: + pipeline.add_node(node1) + mock_attach_to.assert_called_once_with(pipeline) + + +def test_pipeline_add_nodes(): + pipeline = DesignerPipeline() + + class InputNode(Node): + type: NodeType = NodeType.INPUT + + node = InputNode() + + with mock.patch.object(pipeline, "add_node") as mock_add_node: + pipeline.add_nodes(node) + assert mock_add_node.call_count == 1 + + node1 = InputNode() + node2 = InputNode() + + with mock.patch.object(pipeline, "add_node") as mock_add_node: + pipeline.add_nodes(node1, node2) + assert mock_add_node.call_count == 2 + + +def test_pipeline_add_link(): + pipeline = DesignerPipeline() + + class AssetNode(Node): + type: NodeType = NodeType.ASSET + + a = AssetNode() + a.outputs.create_param("output", DataType.TEXT) + b = AssetNode() + b.inputs.create_param("input", DataType.TEXT) + + link = Link(from_node=a, to_node=b, from_param="output", to_param="input") + pipeline.add_link(link) + + with mock.patch.object(link, "attach_to") as mock_attach_to: + pipeline.add_link(link) + mock_attach_to.assert_called_once_with(pipeline) From ef16dd5ff85ebc23d64bbc3c9674ef9a649c68bb Mon Sep 17 00:00:00 2001 From: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Date: Mon, 12 Aug 2024 10:31:34 -0700 Subject: [PATCH 019/105] Updated image upload tests (#213) * Updated image upload tests Signed-off-by: root * Added back Hugging Face onboarding Signed-off-by: Michael Lam --------- Signed-off-by: root Signed-off-by: Michael Lam Co-authored-by: root --- aixplain/factories/model_factory.py | 2 +- tests/functional/model/hf_onboarding_test.py | 14 ++++++------- .../model}/image_upload_e2e_test.py | 21 ++++++++----------- .../model}/image_upload_functional_test.py | 18 +++++++--------- tests/test_requests/create_asset_request.json | 9 ++++---- tests/{ => unit}/image_upload_test.py | 16 +++++++------- 6 files changed, 36 insertions(+), 44 deletions(-) rename tests/{ => functional/model}/image_upload_e2e_test.py (72%) rename tests/{ => functional/model}/image_upload_functional_test.py (81%) rename tests/{ => unit}/image_upload_test.py (85%) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index 0fb845f1..c11d837a 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -361,7 +361,7 @@ def create_asset_repo( if function_dict["name"] == function: function_id = function_dict["id"] if function_id is None: - raise Exception("Invalid function name") + raise Exception(f"Invalid function name {function}") create_url = urljoin(config.BACKEND_URL, f"sdk/models/onboard") logging.debug(f"URL: {create_url}") if api_key: diff --git a/tests/functional/model/hf_onboarding_test.py b/tests/functional/model/hf_onboarding_test.py index 47a38361..fa68d2e8 100644 --- a/tests/functional/model/hf_onboarding_test.py +++ b/tests/functional/model/hf_onboarding_test.py @@ -13,7 +13,7 @@ def test_deploy_model(): # Start the deployment model_name = "Test Model" repo_id = "tiiuae/falcon-7b" - response = ModelFactory.deploy_huggingface_model(model_name, repo_id, config.HF_TOKEN) + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, hf_token=config.HF_TOKEN) assert "id" in response.keys() # Check for status @@ -30,31 +30,31 @@ def test_deploy_model(): delete_asset(model_id, config.TEAM_API_KEY) -@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") +# @pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_nonexistent_model(): # Start the deployment model_name = "Test Model" repo_id = "nonexistent-supplier/nonexistent-model" - response = ModelFactory.deploy_huggingface_model(model_name, repo_id, config.HF_TOKEN) + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, hf_token=config.HF_TOKEN) assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" -@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") +# @pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_size_limit(): # Start the deployment model_name = "Test Model" repo_id = "tiiuae/falcon-40b" - response = ModelFactory.deploy_huggingface_model(model_name, repo_id, config.HF_TOKEN) + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, hf_token=config.HF_TOKEN) assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" -@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") +# @pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_gated_model(): # Start the deployment model_name = "Test Model" repo_id = "meta-llama/Llama-2-7b-hf" - response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, hf_token="mock_key") assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" diff --git a/tests/image_upload_e2e_test.py b/tests/functional/model/image_upload_e2e_test.py similarity index 72% rename from tests/image_upload_e2e_test.py rename to tests/functional/model/image_upload_e2e_test.py index 0e2ccbc5..7c7efbcc 100644 --- a/tests/image_upload_e2e_test.py +++ b/tests/functional/model/image_upload_e2e_test.py @@ -9,7 +9,6 @@ import pytest -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_and_upload_model(): # List the host machines host_response = ModelFactory.list_host_machines() @@ -31,14 +30,15 @@ def test_create_and_upload_model(): # Register the model, and create an image repository for it. with open(Path("tests/test_requests/create_asset_request.json")) as f: - register_payload = json.load(f) - name = register_payload["name"] - host_machine = register_payload["hostingMachine"] - version = register_payload["version"] - description = register_payload["description"] - function = register_payload["function"] - source_language = register_payload["sourceLanguage"] - register_response = ModelFactory.create_asset_repo(name, host_machine, version, description, function, source_language) + mock_register_payload = json.load(f) + name = mock_register_payload["name"] + description = mock_register_payload["description"] + function = mock_register_payload["function"] + source_language = mock_register_payload["sourceLanguage"] + input_modality = mock_register_payload["input_modality"] + output_modality = mock_register_payload["output_modality"] + documentation_url = mock_register_payload["documentation_url"] + register_response = ModelFactory.create_asset_repo(name, description, function, source_language, input_modality, output_modality, documentation_url, config.TEAM_API_KEY) assert "id" in register_response.keys() assert "repositoryName" in register_response.keys() model_id = register_response["id"] @@ -56,10 +56,7 @@ def test_create_and_upload_model(): registry = login_response["registry"] # Push an image to ECR - # os.system("aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 535945872701.dkr.ecr.us-east-1.amazonaws.com") low_level_client = docker.APIClient(base_url="unix://var/run/docker.sock") - # low_level_client.pull("535945872701.dkr.ecr.us-east-1.amazonaws.com/bash") - # low_level_client.tag("535945872701.dkr.ecr.us-east-1.amazonaws.com/bash", f"{registry}/{repo_name}") low_level_client.pull("bash") low_level_client.tag("bash", f"{registry}/{repo_name}") low_level_client.push(f"{registry}/{repo_name}", auth_config={"username": username, "password": password}) diff --git a/tests/image_upload_functional_test.py b/tests/functional/model/image_upload_functional_test.py similarity index 81% rename from tests/image_upload_functional_test.py rename to tests/functional/model/image_upload_functional_test.py index b9dd3ebf..60d1d3f0 100644 --- a/tests/image_upload_functional_test.py +++ b/tests/functional/model/image_upload_functional_test.py @@ -1,13 +1,12 @@ __author__ = "michaellam" from pathlib import Path import json -from aixplain.utils import config -from tests.test_utils import delete_asset, delete_service_account from aixplain.factories.model_factory import ModelFactory +from tests.test_utils import delete_asset, delete_service_account +from aixplain.utils import config +import docker import pytest - -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_login(): response = ModelFactory.asset_repo_login() assert response["username"] == "AWS" @@ -18,18 +17,17 @@ def test_login(): delete_service_account(config.TEAM_API_KEY) -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_asset_repo(): with open(Path("tests/test_requests/create_asset_request.json")) as f: mock_register_payload = json.load(f) name = mock_register_payload["name"] - host_machine = mock_register_payload["hostingMachine"] - version = mock_register_payload["version"] description = mock_register_payload["description"] function = mock_register_payload["function"] source_language = mock_register_payload["sourceLanguage"] - response = ModelFactory.create_asset_repo(name, host_machine, version, description, function, source_language) - print(response) + input_modality = mock_register_payload["input_modality"] + output_modality = mock_register_payload["output_modality"] + documentation_url = mock_register_payload["documentation_url"] + response = ModelFactory.create_asset_repo(name, description, function, source_language, input_modality, output_modality, documentation_url, config.TEAM_API_KEY) response_dict = dict(response) assert "id" in response_dict.keys() assert "repositoryName" in response_dict.keys() @@ -38,7 +36,6 @@ def test_create_asset_repo(): delete_asset(response["id"], config.TEAM_API_KEY) -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_list_host_machines(): response = ModelFactory.list_host_machines() for hosting_machine_dict in response: @@ -49,7 +46,6 @@ def test_list_host_machines(): assert "hourlyCost" in hosting_machine_dict.keys() -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_get_functions(): # Verbose response = ModelFactory.list_functions(True) diff --git a/tests/test_requests/create_asset_request.json b/tests/test_requests/create_asset_request.json index 4683e526..688dd33a 100644 --- a/tests/test_requests/create_asset_request.json +++ b/tests/test_requests/create_asset_request.json @@ -1,8 +1,9 @@ { "name": "mock_name", - "hostingMachine": "aix-2c-8g-od", - "version": "mock_version", "description": "mock_description", - "function": "Speech Recognition", - "sourceLanguage": "en" + "function": "Text Generation", + "sourceLanguage": "en", + "input_modality": "text", + "output_modality": "text", + "documentation_url": "" } \ No newline at end of file diff --git a/tests/image_upload_test.py b/tests/unit/image_upload_test.py similarity index 85% rename from tests/image_upload_test.py rename to tests/unit/image_upload_test.py index fb919171..4b192292 100644 --- a/tests/image_upload_test.py +++ b/tests/unit/image_upload_test.py @@ -13,7 +13,6 @@ API_FIXED_HEADER = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_login(): url = urljoin(config.BACKEND_URL, f"sdk/ecr/login") with requests_mock.Mocker() as mock: @@ -24,24 +23,26 @@ def test_login(): assert creds == mock_json -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_asset_repo(): - url_register = urljoin(config.BACKEND_URL, f"sdk/models/register") + url_register = urljoin(config.BACKEND_URL, f"sdk/models/onboard") url_function = urljoin(config.BACKEND_URL, f"sdk/functions") + print(f"URL_Register {url_register}") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/create_asset_repo_response.json")) as f: mock_json_register = json.load(f) - mock.post(url_register, headers=API_FIXED_HEADER, json=mock_json_register) + mock.post(url_register, headers=API_FIXED_HEADER, json=mock_json_register, status_code=201) + with open(Path("tests/mock_responses/list_functions_response.json")) as f: mock_json_functions = json.load(f) mock.get(url_function, headers=AUTH_FIXED_HEADER, json=mock_json_functions) + model_id = ModelFactory.create_asset_repo( - "mock_name", "mock_machines", "mock_version", "mock_description", "Speech Recognition", "en", config.TEAM_API_KEY + "mock_name", "mock_description", "Text Generation", "en", "text", "text", api_key=config.TEAM_API_KEY ) + # print(f"Model ID {model_id}") assert model_id == mock_json_register -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_list_host_machines(): url = urljoin(config.BACKEND_URL, f"sdk/hosting-machines") with requests_mock.Mocker() as mock: @@ -55,8 +56,6 @@ def test_list_host_machines(): for key in machine_dict.keys(): assert machine_dict[key] == mock_json_dict[key] - -@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_get_functions(): url = urljoin(config.BACKEND_URL, f"sdk/functions") with requests_mock.Mocker() as mock: @@ -66,7 +65,6 @@ def test_get_functions(): functions = ModelFactory.list_functions(config.TEAM_API_KEY) assert functions == mock_json - @pytest.mark.skip(reason="Not currently supported.") def test_list_image_repo_tags(): model_id = "mock_id" From d0ad51d2990d291f80cb088cd69ed1d6412d39d3 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:03:24 -0300 Subject: [PATCH 020/105] Eng 217 local path (#220) * Content inputs to be processed according to the query. * Add data and query parameters on running agent * Enable processing keys/values in content as well * Agent units tests and tags simolar Jinja2 --- aixplain/modules/agent/__init__.py | 30 ++++++++------ tests/unit/agent_test.py | 63 ++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 12 deletions(-) create mode 100644 tests/unit/agent_test.py diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index 8a5cd120..c0604f6a 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -105,7 +105,7 @@ def run( timeout: float = 300, parameters: Dict = {}, wait_time: float = 0.5, - content: List[Text] = [], + content: Optional[Union[Dict[Text, Text], List[Text]]] = None, ) -> Dict: """Runs an agent call. @@ -118,7 +118,7 @@ def run( timeout (float, optional): total polling time. Defaults to 300. parameters (Dict, optional): optional parameters to the model. Defaults to "{}". wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. - content (List[Text], optional): Content inputs to be processed according to the query. Defaults to []. + content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. Returns: Dict: parsed output from model @@ -156,7 +156,7 @@ def run_async( history: Optional[List[Dict]] = None, name: Text = "model_process", parameters: Dict = {}, - content: List[Text] = [], + content: Optional[Union[Dict[Text, Text], List[Text]]] = None, ) -> Dict: """Runs asynchronously an agent call. @@ -167,7 +167,7 @@ def run_async( history (Optional[List[Dict]], optional): chat history (in case session ID is None). Defaults to None. name (Text, optional): ID given to a call. Defaults to "model_process". parameters (Dict, optional): optional parameters to the model. Defaults to "{}". - content (List[Text], optional): Content inputs to be processed according to the query. Defaults to []. + content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. Returns: dict: polling URL in response @@ -183,19 +183,25 @@ def run_async( session_id = data.get("session_id") if history is None: history = data.get("history") - if len(content) == 0: - content = data.get("content", []) + if content is None: + content = data.get("content") else: query = data # process content inputs - content = list(set(content)) - if len(content) > 0: + if content is not None: assert FileFactory.check_storage_type(query) == StorageType.TEXT, "When providing 'content', query must be text." - assert len(content) <= 3, "The maximum number of content inputs is 3." - for input_link in content: - input_link = FileFactory.to_link(input_link) - query += f"\n{input_link}" + + if isinstance(content, list): + assert len(content) <= 3, "The maximum number of content inputs is 3." + for input_link in content: + input_link = FileFactory.to_link(input_link) + query += f"\n{input_link}" + elif isinstance(content, dict): + for key, value in content.items(): + assert "{{" + key + "}}" in query, f"Key '{key}' not found in query." + value = FileFactory.to_link(value) + query = query.replace("{{" + key + "}}", f"'{value}'") headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py new file mode 100644 index 00000000..680fc21a --- /dev/null +++ b/tests/unit/agent_test.py @@ -0,0 +1,63 @@ +import pytest +import requests_mock +from aixplain.modules import Agent +from aixplain.utils import config + + +def test_fail_no_data_query(): + agent = Agent("123", "Test Agent") + with pytest.raises(Exception) as exc_info: + agent.run_async() + assert str(exc_info.value) == "Either 'data' or 'query' must be provided." + + +def test_fail_query_must_be_provided(): + agent = Agent("123", "Test Agent") + with pytest.raises(Exception) as exc_info: + agent.run_async(data={}) + assert str(exc_info.value) == "When providing a dictionary, 'query' must be provided." + + +def test_fail_query_as_text_when_content_not_empty(): + agent = Agent("123", "Test Agent") + with pytest.raises(Exception) as exc_info: + agent.run_async( + data={"query": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav"}, + content=["https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav"], + ) + assert str(exc_info.value) == "When providing 'content', query must be text." + + +def test_fail_content_exceed_maximum(): + agent = Agent("123", "Test Agent") + with pytest.raises(Exception) as exc_info: + agent.run_async( + data={"query": "Transcribe the audios:"}, + content=[ + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + ], + ) + assert str(exc_info.value) == "The maximum number of content inputs is 3." + + +def test_fail_key_not_found(): + agent = Agent("123", "Test Agent") + with pytest.raises(Exception) as exc_info: + agent.run_async(data={"query": "Translate the text: {{input1}}"}, content={"input2": "Hello, how are you?"}) + assert str(exc_info.value) == "Key 'input2' not found in query." + + +def test_sucess_query_content(): + agent = Agent("123", "Test Agent") + with requests_mock.Mocker() as mock: + url = agent.url + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + ref_response = {"data": "Hello, how are you?", "status": "IN_PROGRESS"} + mock.post(url, headers=headers, json=ref_response) + + response = agent.run_async(data={"query": "Translate the text: {{input1}}"}, content={"input1": "Hello, how are you?"}) + assert response["status"] == ref_response["status"] + assert response["url"] == ref_response["data"] From dca1a372076ccfb2bbcc949419ab6e6c6b9e4266 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:44:23 -0300 Subject: [PATCH 021/105] Eng 389 fix tests (#222) * Fixing circular import and duplicated names * Fixing the setting of function in agents --- aixplain/factories/agent_factory/__init__.py | 2 +- aixplain/modules/agent/tool/model_tool.py | 6 +- .../functional/agent/agent_functional_test.py | 4 + ...designer_test.py => designer_unit_test.py} | 78 +++++-------------- 4 files changed, 28 insertions(+), 62 deletions(-) rename tests/unit/{designer_test.py => designer_unit_test.py} (89%) diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 6076eef6..0c73637c 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -67,7 +67,7 @@ def create( if isinstance(tool, ModelTool): tool_payload.append( { - "function": tool.function.value if tool.function is not None else None, + "function": tool.function.value, "type": "model", "description": tool.description, "supplier": tool.supplier.value["code"] if tool.supplier else None, diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index a5acab30..e15a8bea 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -24,7 +24,6 @@ from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier -from aixplain.factories.model_factory import ModelFactory from aixplain.modules.agent.tool import Tool from aixplain.modules.model import Model @@ -58,7 +57,6 @@ def __init__( if function is not None: if isinstance(function, str): function = Function(function) - self.function = function try: if isinstance(supplier, dict): @@ -68,9 +66,13 @@ def __init__( if model is not None: if isinstance(model, Text) is True: + from aixplain.factories.model_factory import ModelFactory + model = ModelFactory.get(model) + function = model.function if isinstance(model.supplier, Supplier): supplier = model.supplier model = model.id self.supplier = supplier self.model = model + self.function = function diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index cefd34c3..58d421c8 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -37,6 +37,9 @@ def run_input_map(request): def test_end2end(run_input_map): + for agent in AgentFactory.list()["results"]: + agent.delete() + tools = [] if "model_tools" in run_input_map: for tool in run_input_map["model_tools"]: @@ -47,6 +50,7 @@ def test_end2end(run_input_map): ]: tool["supplier"] = supplier break + print("TOOL: ", tool) tools.append(AgentFactory.create_model_tool(**tool)) if "pipeline_tools" in run_input_map: for tool in run_input_map["pipeline_tools"]: diff --git a/tests/unit/designer_test.py b/tests/unit/designer_unit_test.py similarity index 89% rename from tests/unit/designer_test.py rename to tests/unit/designer_unit_test.py index 766c7e54..824fd162 100644 --- a/tests/unit/designer_test.py +++ b/tests/unit/designer_unit_test.py @@ -30,9 +30,7 @@ def test_create_node(): class BareNode(Node): pass - with mock.patch( - "aixplain.modules.pipeline.designer.Node.attach_to" - ) as mock_attach_to: + with mock.patch("aixplain.modules.pipeline.designer.Node.attach_to") as mock_attach_to: node = BareNode(number=3, label="FOO") mock_attach_to.assert_not_called() assert isinstance(node.inputs, Inputs) @@ -50,9 +48,7 @@ class FooNode(Node[FooNodeInputs, FooNodeOutputs]): inputs_class = FooNodeInputs outputs_class = FooNodeOutputs - with mock.patch( - "aixplain.modules.pipeline.designer.Node.attach_to" - ) as mock_attach_to: + with mock.patch("aixplain.modules.pipeline.designer.Node.attach_to") as mock_attach_to: node = FooNode(pipeline=pipeline, number=3, label="FOO") mock_attach_to.assert_called_once_with(pipeline) assert isinstance(node.inputs, FooNodeInputs) @@ -120,9 +116,7 @@ class AssetNode(Node): node = AssetNode() with mock.patch.object(node.inputs, "serialize") as mock_inputs_serialize: - with mock.patch.object( - node.outputs, "serialize" - ) as mock_outputs_serialize: + with mock.patch.object(node.outputs, "serialize") as mock_outputs_serialize: assert node.serialize() == { "number": node.number, "type": NodeType.ASSET, @@ -148,13 +142,10 @@ class AssetNode(Node): def test_create_param(): - class TypedParam(Param): param_type = ParamType.INPUT - with mock.patch( - "aixplain.modules.pipeline.designer.Param.attach_to" - ) as mock_attach_to: + with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = TypedParam( code="param", data_type=DataType.TEXT, @@ -167,9 +158,7 @@ class TypedParam(Param): assert param.value == "foo" assert param.param_type == ParamType.INPUT - with mock.patch( - "aixplain.modules.pipeline.designer.Param.attach_to" - ) as mock_attach_to: + with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = TypedParam( code="param", data_type=DataType.TEXT, @@ -186,9 +175,7 @@ class TypedParam(Param): class UnTypedParam(Param): pass - with mock.patch( - "aixplain.modules.pipeline.designer.Param.attach_to" - ) as mock_attach_to: + with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = UnTypedParam( code="param", data_type=DataType.TEXT, @@ -199,9 +186,7 @@ class UnTypedParam(Param): assert param.param_type == ParamType.OUTPUT - with mock.patch( - "aixplain.modules.pipeline.designer.Param.attach_to" - ) as mock_attach_to: + with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = UnTypedParam( code="param", data_type=DataType.TEXT, @@ -217,9 +202,7 @@ class AssetNode(Node): node = AssetNode() - with mock.patch( - "aixplain.modules.pipeline.designer.Param.attach_to" - ) as mock_attach_to: + with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = UnTypedParam( code="param", data_type=DataType.TEXT, @@ -243,12 +226,8 @@ class AssetNode(Node): node = AssetNode() - with mock.patch( - "aixplain.modules.pipeline.designer.Param.attach_to" - ) as mock_attach_to: - param = param_cls( - code="param", data_type=DataType.TEXT, value="foo", node=node - ) + with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: + param = param_cls(code="param", data_type=DataType.TEXT, value="foo", node=node) mock_attach_to.assert_called_once_with(node) assert param.code == "param" assert param.data_type == DataType.TEXT @@ -322,12 +301,8 @@ class AssetNode(Node, LinkableMixin): assert "Param not registered as output" in str(excinfo.value) - output = OutputParam( - code="output", data_type=DataType.TEXT, value="bar", node=a - ) - input = InputParam( - code="input", data_type=DataType.TEXT, value="foo", node=b - ) + output = OutputParam(code="output", data_type=DataType.TEXT, value="bar", node=a) + input = InputParam(code="input", data_type=DataType.TEXT, value="foo", node=b) with mock.patch.object(input, "back_link") as mock_back_link: output.link(input) @@ -364,12 +339,8 @@ class AssetNode(Node, LinkableMixin): assert "Param not registered as input" in str(excinfo.value) - output = OutputParam( - code="output", data_type=DataType.TEXT, value="bar", node=a - ) - input = InputParam( - code="input", data_type=DataType.TEXT, value="foo", node=b - ) + output = OutputParam(code="output", data_type=DataType.TEXT, value="bar", node=a) + input = InputParam(code="input", data_type=DataType.TEXT, value="foo", node=b) with mock.patch.object(a, "link") as mock_link: input.back_link(output) @@ -429,9 +400,7 @@ class AssetNode(Node, LinkableMixin): pipeline = DesignerPipeline() - with mock.patch( - "aixplain.modules.pipeline.designer.Link.attach_to" - ) as mock_attach_to: + with mock.patch("aixplain.modules.pipeline.designer.Link.attach_to") as mock_attach_to: link = Link( from_node=a, to_node=b, @@ -588,12 +557,8 @@ class AssetNode(Node): with mock.patch.object(param_proxy, "_create_param") as mock_create_param: with mock.patch.object(param_proxy, "add_param") as mock_add_param: - param = param_proxy.create_param( - "foo", DataType.TEXT, "bar", is_required=True - ) - mock_create_param.assert_called_once_with( - "foo", DataType.TEXT, "bar" - ) + param = param_proxy.create_param("foo", DataType.TEXT, "bar", is_required=True) + mock_create_param.assert_called_once_with("foo", DataType.TEXT, "bar") mock_add_param.assert_called_once_with(param) assert param.is_required is True @@ -624,19 +589,14 @@ class FooParam(Param): def test_node_link(): - class AssetNode(Node, LinkableMixin): type: NodeType = NodeType.ASSET a = AssetNode() b = AssetNode() - output = OutputParam( - code="output", data_type=DataType.TEXT, value="bar", node=a - ) - input = InputParam( - code="input", data_type=DataType.TEXT, value="foo", node=b - ) + output = OutputParam(code="output", data_type=DataType.TEXT, value="bar", node=a) + input = InputParam(code="input", data_type=DataType.TEXT, value="foo", node=b) # here too lazy to mock Link class properly # checking the output instance instead From b1133683768416e3d34d7ea6185a0d14d2519ce3 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Tue, 20 Aug 2024 00:54:20 +0300 Subject: [PATCH 022/105] Tool Validation when creating agents (#226) * Implemented changes * Added test and fixed some issues, pytest-check fails regardless if I include or disclue new tests. * Initial changes, without test * Made changes, pytest-check fails * Fixing mixup * Fixing mixup * Fixing mixup * Handled Exception properly, removed extra space * Removed .DS_Store * Only validate if model is provided by user, not function * Validating Model on ModelTool.__init__ --------- Co-authored-by: xainaz Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/agent_factory/__init__.py | 2 + aixplain/modules/agent/tool/model_tool.py | 19 +++- aixplain/modules/agent/tool/pipeline_tool.py | 8 ++ aixplain/modules/pipeline/asset.py | 96 ++++---------------- tests/unit/agent_test.py | 16 ++++ 5 files changed, 60 insertions(+), 81 deletions(-) diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 6076eef6..4de4e582 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -65,6 +65,7 @@ def create( tool_payload = [] for tool in tools: if isinstance(tool, ModelTool): + tool.validate() tool_payload.append( { "function": tool.function.value if tool.function is not None else None, @@ -76,6 +77,7 @@ def create( } ) elif isinstance(tool, PipelineTool): + tool.validate() tool_payload.append( { "assetId": tool.pipeline, diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index 79e4601d..c88f1ee0 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -24,7 +24,6 @@ from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier -from aixplain.factories.model_factory import ModelFactory from aixplain.modules.agent.tool import Tool from aixplain.modules.model import Model @@ -67,13 +66,23 @@ def __init__( if model is not None: if isinstance(model, Text) is True: - from aixplain.factories.model_factory import ModelFactory - - model = ModelFactory.get(model) + self.model = model + model = self.validate() function = model.function if isinstance(model.supplier, Supplier): supplier = model.supplier model = model.id self.supplier = supplier self.model = model - self.function = function \ No newline at end of file + self.function = function + + def validate(self) -> Model: + from aixplain.factories.model_factory import ModelFactory + + try: + model = None + if self.model is not None: + model = ModelFactory.get(self.model) + return model + except Exception: + raise Exception(f"Model Tool Unavailable. Make sure Model '{self.model}' exists or you have access to it.") diff --git a/aixplain/modules/agent/tool/pipeline_tool.py b/aixplain/modules/agent/tool/pipeline_tool.py index a517b198..5ad2915a 100644 --- a/aixplain/modules/agent/tool/pipeline_tool.py +++ b/aixplain/modules/agent/tool/pipeline_tool.py @@ -50,3 +50,11 @@ def __init__( if isinstance(pipeline, Pipeline): pipeline = pipeline.id self.pipeline = pipeline + + def validate(self): + from aixplain.factories.pipeline_factory import PipelineFactory + + try: + PipelineFactory.get(self.pipeline) + except Exception: + raise Exception(f"Pipeline Tool Unavailable. Make sure Pipeline '{self.pipeline}' exists or you have access to it.") diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index ce168036..ad7cfa1b 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -24,7 +24,6 @@ import time import json import os -import uuid import logging from aixplain.modules.asset import Asset from aixplain.utils import config @@ -101,9 +100,7 @@ def __polling( while not completed and (end - start) < timeout: try: response_body = self.poll(poll_url, name=name) - logging.debug( - f"Polling for Pipeline: Status of polling for {name} : {response_body}" - ) + logging.debug(f"Polling for Pipeline: Status of polling for {name} : {response_body}") completed = response_body["completed"] end = time.time() @@ -112,18 +109,12 @@ def __polling( if wait_time < 60: wait_time *= 1.1 except Exception: - logging.error( - f"Polling for Pipeline: polling for {name} : Continue" - ) + logging.error(f"Polling for Pipeline: polling for {name} : Continue") if response_body and response_body["status"] == "SUCCESS": try: - logging.debug( - f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}" - ) + logging.debug(f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}") except Exception: - logging.error( - f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}" - ) + logging.error(f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}") else: logging.error( f"Polling for Pipeline: Final status of polling for {name} : No response in {timeout} seconds - {response_body}" @@ -148,9 +139,7 @@ def poll(self, poll_url: Text, name: Text = "pipeline_process") -> Dict: r = _request_with_retry("get", poll_url, headers=headers) try: resp = r.json() - logging.info( - f"Single Poll for Pipeline: Status of polling for {name} : {resp}" - ) + logging.info(f"Single Poll for Pipeline: Status of polling for {name} : {resp}") except Exception: resp = {"status": "FAILED"} return resp @@ -179,18 +168,14 @@ def run( """ start = time.time() try: - response = self.run_async( - data, data_asset=data_asset, name=name, **kwargs - ) + response = self.run_async(data, data_asset=data_asset, name=name, **kwargs) if response["status"] == "FAILED": end = time.time() response["elapsed_time"] = end - start return response poll_url = response["url"] end = time.time() - response = self.__polling( - poll_url, name=name, timeout=timeout, wait_time=wait_time - ) + response = self.__polling(poll_url, name=name, timeout=timeout, wait_time=wait_time) return response except Exception as e: error_message = f"Error in request for {name}: {str(e)}" @@ -240,10 +225,7 @@ def __prepare_payload( try: payload = json.loads(data) if isinstance(payload, dict) is False: - if ( - isinstance(payload, int) is True - or isinstance(payload, float) is True - ): + if isinstance(payload, int) is True or isinstance(payload, float) is True: payload = str(payload) payload = {"data": payload} except Exception: @@ -273,33 +255,15 @@ def __prepare_payload( try: dasset = CorpusFactory.get(str(data_asset[node_label])) asset_payload["dataAsset"]["corpus_id"] = dasset.id - if ( - len( - [ - d - for d in dasset.data - if d.id == data[node_label] - ] - ) - > 0 - ): + if len([d for d in dasset.data if d.id == data[node_label]]) > 0: data_found = True except Exception: try: - dasset = DatasetFactory.get( - str(data_asset[node_label]) - ) + dasset = DatasetFactory.get(str(data_asset[node_label])) asset_payload["dataAsset"]["dataset_id"] = dasset.id if ( - len( - [ - dfield - for dfield in dasset.source_data - if dasset.source_data[dfield].id - == data[node_label] - ] - ) + len([dfield for dfield in dasset.source_data if dasset.source_data[dfield].id == data[node_label]]) > 0 ): data_found = True @@ -332,11 +296,7 @@ def __prepare_payload( return payload def run_async( - self, - data: Union[Text, Dict], - data_asset: Optional[Union[Text, Dict]] = None, - name: Text = "pipeline_process", - **kwargs + self, data: Union[Text, Dict], data_asset: Optional[Union[Text, Dict]] = None, name: Text = "pipeline_process", **kwargs ) -> Dict: """Runs asynchronously a pipeline call. @@ -359,16 +319,12 @@ def run_async( payload = json.dumps(payload) call_url = f"{self.url}/{self.id}" logging.info(f"Start service for {name} - {call_url} - {payload}") - r = _request_with_retry( - "post", call_url, headers=headers, data=payload - ) + r = _request_with_retry("post", call_url, headers=headers, data=payload) resp = None try: resp = r.json() - logging.info( - f"Result of request for {name} - {r.status_code} - {resp}" - ) + logging.info(f"Result of request for {name} - {r.status_code} - {resp}") poll_url = resp["url"] response = {"status": "IN_PROGRESS", "url": poll_url} @@ -405,9 +361,7 @@ def update( for i, node in enumerate(pipeline["nodes"]): if "functionType" in node and node["functionType"] == "AI": - pipeline["nodes"][i]["functionType"] = pipeline["nodes"][ - i - ]["functionType"].lower() + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload status = "draft" if save_as_asset is True: @@ -423,9 +377,7 @@ def update( "Authorization": f"Token {api_key}", "Content-Type": "application/json", } - logging.info( - f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}" - ) + logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("put", url, headers=headers, json=payload) response = r.json() logging.info(f"Pipeline {response['id']} Updated.") @@ -440,9 +392,7 @@ def delete(self) -> None: "Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json", } - logging.info( - f"Start service for DELETE Pipeline - {url} - {headers}" - ) + logging.info(f"Start service for DELETE Pipeline - {url} - {headers}") r = _request_with_retry("delete", url, headers=headers) if r.status_code != 200: raise Exception() @@ -451,9 +401,7 @@ def delete(self) -> None: logging.error(message) raise Exception(f"{message}") - def save( - self, save_as_asset: bool = False, api_key: Optional[Text] = None - ): + def save(self, save_as_asset: bool = False, api_key: Optional[Text] = None): """Save Pipeline Args: @@ -468,9 +416,7 @@ def save( for i, node in enumerate(pipeline["nodes"]): if "functionType" in node and node["functionType"] == "AI": - pipeline["nodes"][i]["functionType"] = pipeline["nodes"][ - i - ]["functionType"].lower() + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload status = "draft" if save_as_asset is True: @@ -492,9 +438,7 @@ def save( "Authorization": f"Token {api_key}", "Content-Type": "application/json", } - logging.info( - f"Start service for Save Pipeline - {url} - {headers} - {json.dumps(payload)}" - ) + logging.info(f"Start service for Save Pipeline - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry(method, url, headers=headers, json=payload) response = r.json() self.id = response["id"] diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 680fc21a..18c92fa3 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -2,6 +2,8 @@ import requests_mock from aixplain.modules import Agent from aixplain.utils import config +from aixplain.factories import AgentFactory +from aixplain.modules.agent import PipelineTool, ModelTool def test_fail_no_data_query(): @@ -61,3 +63,17 @@ def test_sucess_query_content(): response = agent.run_async(data={"query": "Translate the text: {{input1}}"}, content={"input1": "Hello, how are you?"}) assert response["status"] == ref_response["status"] assert response["url"] == ref_response["data"] + + +def test_invalid_pipelinetool(): + with pytest.raises(Exception) as exc_info: + AgentFactory.create( + name="Test", tools=[PipelineTool(pipeline="309851793", description="Test")], llm_id="6646261c6eb563165658bbb1" + ) + assert str(exc_info.value) == "Pipeline Tool Unavailable. Make sure Pipeline '309851793' exists or you have access to it." + + +def test_invalid_modeltool(): + with pytest.raises(Exception) as exc_info: + AgentFactory.create(name="Test", tools=[ModelTool(model="309851793")], llm_id="6646261c6eb563165658bbb1") + assert str(exc_info.value) == "Model Tool Unavailable. Make sure Model '309851793' exists or you have access to it." From 0032947f305cb51a1baf7609c6ab0479578ceb24 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Tue, 20 Aug 2024 16:47:48 +0300 Subject: [PATCH 023/105] Eng 398 sdk get users credits - Initial (#232) * Add .DS_Store to .gitignore * Initial commit, added wallet factory and wallet module. No test is added yet * Added test * Formatting Wallet tests --------- Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> --- .gitignore | 2 ++ aixplain/factories/__init__.py | 1 + aixplain/factories/wallet_factoy.py | 26 ++++++++++++++++++++++ aixplain/modules/wallet.py | 34 +++++++++++++++++++++++++++++ tests/unit/wallet_test.py | 16 ++++++++++++++ 5 files changed, 79 insertions(+) create mode 100644 aixplain/factories/wallet_factoy.py create mode 100644 aixplain/modules/wallet.py create mode 100644 tests/unit/wallet_test.py diff --git a/.gitignore b/.gitignore index 843c6556..ad7c16c8 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,5 @@ dmypy.json # Vscode .vscode +.DS_Store + diff --git a/aixplain/factories/__init__.py b/aixplain/factories/__init__.py index 7b876899..08cb8d4a 100644 --- a/aixplain/factories/__init__.py +++ b/aixplain/factories/__init__.py @@ -30,3 +30,4 @@ from .model_factory import ModelFactory from .pipeline_factory import PipelineFactory from .finetune_factory import FinetuneFactory +from .wallet_factoy import WalletFactory diff --git a/aixplain/factories/wallet_factoy.py b/aixplain/factories/wallet_factoy.py new file mode 100644 index 00000000..59ec7c14 --- /dev/null +++ b/aixplain/factories/wallet_factoy.py @@ -0,0 +1,26 @@ +import aixplain.utils.config as config +from aixplain.modules.wallet import Wallet +from aixplain.utils.file_utils import _request_with_retry +import logging + + +class WalletFactory: + aixplain_key = config.AIXPLAIN_API_KEY + backend_url = config.BACKEND_URL + + @classmethod + def get(cls) -> Wallet: + """Get wallet information""" + try: + resp = None + # Check for code 200, other code will be caught when trying to return a Wallet object + url = f"{cls.backend_url}/sdk/billing/wallet" + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start fetching billing information from - {url} - {headers}") + headers = {"Content-Type": "application/json", "x-api-key": config.TEAM_API_KEY} + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + return Wallet(total_balance=resp["totalBalance"], reserved_balance=resp["reservedBalance"]) + except Exception as e: + raise Exception(f"Failed to get the wallet credit information. Error: {str(e)}") diff --git a/aixplain/modules/wallet.py b/aixplain/modules/wallet.py new file mode 100644 index 00000000..d7c63524 --- /dev/null +++ b/aixplain/modules/wallet.py @@ -0,0 +1,34 @@ +__author__ = "aixplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: aiXplain Team +Date: August 20th 2024 +Description: + Wallet Class +""" + + +class Wallet: + def __init__(self, total_balance: float, reserved_balance: float): + """Create a Wallet with the necessary information + + Args: + total_balance (float): total credit balance + reserved_balance (float): reserved credit balance + """ + self.total_balance = total_balance + self.reserved_balance = reserved_balance diff --git a/tests/unit/wallet_test.py b/tests/unit/wallet_test.py new file mode 100644 index 00000000..48ee19ab --- /dev/null +++ b/tests/unit/wallet_test.py @@ -0,0 +1,16 @@ +__author__ = "aixplain" + +from aixplain.factories import WalletFactory +import aixplain.utils.config as config +import requests_mock + + +def test_wallet_service(): + with requests_mock.Mocker() as mock: + url = f"{config.BACKEND_URL}/sdk/billing/wallet" + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + ref_response = {"totalBalance": 5, "reservedBalance": "0"} + mock.get(url, headers=headers, json=ref_response) + wallet = WalletFactory.get() + assert wallet.total_balance == ref_response["totalBalance"] + assert wallet.reserved_balance == ref_response["reservedBalance"] From a5675354a8cda213cd9b52508d921cca980c587f Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:12:22 -0300 Subject: [PATCH 024/105] Eng 398 sdk get users credits (#234) * Add .DS_Store to .gitignore * Initial commit, added wallet factory and wallet module. No test is added yet * Added test * Formatting Wallet tests * wallet_factoy -> wallet_factory --------- Co-authored-by: xainaz --- aixplain/factories/__init__.py | 2 +- aixplain/factories/wallet_factory.py | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 aixplain/factories/wallet_factory.py diff --git a/aixplain/factories/__init__.py b/aixplain/factories/__init__.py index 08cb8d4a..70361e77 100644 --- a/aixplain/factories/__init__.py +++ b/aixplain/factories/__init__.py @@ -30,4 +30,4 @@ from .model_factory import ModelFactory from .pipeline_factory import PipelineFactory from .finetune_factory import FinetuneFactory -from .wallet_factoy import WalletFactory +from .wallet_factory import WalletFactory diff --git a/aixplain/factories/wallet_factory.py b/aixplain/factories/wallet_factory.py new file mode 100644 index 00000000..59ec7c14 --- /dev/null +++ b/aixplain/factories/wallet_factory.py @@ -0,0 +1,26 @@ +import aixplain.utils.config as config +from aixplain.modules.wallet import Wallet +from aixplain.utils.file_utils import _request_with_retry +import logging + + +class WalletFactory: + aixplain_key = config.AIXPLAIN_API_KEY + backend_url = config.BACKEND_URL + + @classmethod + def get(cls) -> Wallet: + """Get wallet information""" + try: + resp = None + # Check for code 200, other code will be caught when trying to return a Wallet object + url = f"{cls.backend_url}/sdk/billing/wallet" + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start fetching billing information from - {url} - {headers}") + headers = {"Content-Type": "application/json", "x-api-key": config.TEAM_API_KEY} + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + return Wallet(total_balance=resp["totalBalance"], reserved_balance=resp["reservedBalance"]) + except Exception as e: + raise Exception(f"Failed to get the wallet credit information. Error: {str(e)}") From e919fab4f6c3f8f829382e5057541aaa6436e971 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Thu, 22 Aug 2024 00:11:08 +0300 Subject: [PATCH 025/105] Removed wallet_factoy.py (#235) Co-authored-by: xainaz --- aixplain/factories/wallet_factoy.py | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 aixplain/factories/wallet_factoy.py diff --git a/aixplain/factories/wallet_factoy.py b/aixplain/factories/wallet_factoy.py deleted file mode 100644 index 59ec7c14..00000000 --- a/aixplain/factories/wallet_factoy.py +++ /dev/null @@ -1,26 +0,0 @@ -import aixplain.utils.config as config -from aixplain.modules.wallet import Wallet -from aixplain.utils.file_utils import _request_with_retry -import logging - - -class WalletFactory: - aixplain_key = config.AIXPLAIN_API_KEY - backend_url = config.BACKEND_URL - - @classmethod - def get(cls) -> Wallet: - """Get wallet information""" - try: - resp = None - # Check for code 200, other code will be caught when trying to return a Wallet object - url = f"{cls.backend_url}/sdk/billing/wallet" - - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - logging.info(f"Start fetching billing information from - {url} - {headers}") - headers = {"Content-Type": "application/json", "x-api-key": config.TEAM_API_KEY} - r = _request_with_retry("get", url, headers=headers) - resp = r.json() - return Wallet(total_balance=resp["totalBalance"], reserved_balance=resp["reservedBalance"]) - except Exception as e: - raise Exception(f"Failed to get the wallet credit information. Error: {str(e)}") From 115bf1369025ab1a5475e4fe6e4424e05ed11378 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 22 Aug 2024 11:41:30 -0300 Subject: [PATCH 026/105] Adding supervisor/planning options into SDK (#233) * Adding supervisor/planning options into SDK * Removing llm options for supervisor and planer * Removing orchestrator option --- aixplain/factories/agent_factory/__init__.py | 31 +++++++++-- aixplain/factories/agent_factory/utils.py | 10 ++++ .../functional/agent/agent_functional_test.py | 6 ++ tests/unit/agent_test.py | 55 +++++++++++++++++++ 4 files changed, 98 insertions(+), 4 deletions(-) diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 4de4e582..134b3560 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -34,7 +34,7 @@ from aixplain.utils import config from typing import Dict, List, Optional, Text, Union -from aixplain.factories.agent_factory.utils import build_agent +from aixplain.factories.agent_factory.utils import build_agent, validate_llm from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin @@ -50,8 +50,30 @@ def create( api_key: Text = config.TEAM_API_KEY, supplier: Union[Dict, Text, Supplier, int] = "aiXplain", version: Optional[Text] = None, + use_mentalist_and_inspector: bool = False, ) -> Agent: - """Create a new agent in the platform.""" + """Create a new agent in the platform. + + Args: + name (Text): name of the agent + llm_id (Text): aiXplain ID of the large language model to be used as agent. + tools (List[Tool], optional): list of tool for the agent. Defaults to []. + description (Text, optional): description of the agent role. Defaults to "". + api_key (Text, optional): team/user API key. Defaults to config.TEAM_API_KEY. + supplier (Union[Dict, Text, Supplier, int], optional): owner of the agent. Defaults to "aiXplain". + version (Optional[Text], optional): version of the agent. Defaults to None. + use_mentalist_and_inspector (bool, optional): flag to enable mentalist and inspector agents (which only works when a supervisor is enabled). Defaults to False. + + Returns: + Agent: created Agent + """ + # validate LLM ID + validate_llm(llm_id) + + orchestrator_llm_id, mentalist_and_inspector_llm_id = llm_id, None + if use_mentalist_and_inspector is True: + mentalist_and_inspector_llm_id = llm_id + try: agent = None url = urljoin(config.BACKEND_URL, "sdk/agents") @@ -94,9 +116,10 @@ def create( "description": description, "supplier": supplier, "version": version, + "llmId": llm_id, + "supervisorId": orchestrator_llm_id, + "plannerId": mentalist_and_inspector_llm_id, } - if llm_id is not None: - payload["llmId"] = llm_id logging.info(f"Start service for POST Create Agent - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index 4b314ef7..6aed75ae 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -47,3 +47,13 @@ def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: ) agent.url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}/run") return agent + + +def validate_llm(model_id: Text) -> None: + from aixplain.factories.model_factory import ModelFactory + + try: + llm = ModelFactory.get(model_id) + assert llm.function == Function.TEXT_GENERATION, "Large Language Model must be a text generation model." + except Exception: + raise Exception(f"Large Language Model with ID '{model_id}' not found.") diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index f6ff0408..0acdb5be 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -75,3 +75,9 @@ def test_list_agents(): assert "results" in agents agents_result = agents["results"] assert type(agents_result) is list + + +def test_fail_non_existent_llm(): + with pytest.raises(Exception) as exc_info: + AgentFactory.create(name="Test Agent", llm_id="non_existent_llm", tools=[]) + assert str(exc_info.value) == "Large Language Model with ID 'non_existent_llm' not found." diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 18c92fa3..8a619011 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -4,6 +4,7 @@ from aixplain.utils import config from aixplain.factories import AgentFactory from aixplain.modules.agent import PipelineTool, ModelTool +from urllib.parse import urljoin def test_fail_no_data_query(): @@ -77,3 +78,57 @@ def test_invalid_modeltool(): with pytest.raises(Exception) as exc_info: AgentFactory.create(name="Test", tools=[ModelTool(model="309851793")], llm_id="6646261c6eb563165658bbb1") assert str(exc_info.value) == "Model Tool Unavailable. Make sure Model '309851793' exists or you have access to it." + + +def test_create_agent(): + from aixplain.enums import Supplier + + with requests_mock.Mocker() as mock: + url = urljoin(config.BACKEND_URL, "sdk/agents") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + + ref_response = { + "id": "123", + "name": "Test Agent", + "description": "Test Agent Description", + "teamId": "123", + "version": "1.0", + "status": "onboarded", + "llmId": "6646261c6eb563165658bbb1", + "pricing": {"currency": "USD", "value": 0.0}, + "assets": [ + { + "type": "model", + "supplier": "openai", + "version": "1.0", + "assetId": "6646261c6eb563165658bbb1", + "function": "text-generation", + } + ], + } + mock.post(url, headers=headers, json=ref_response) + + url = urljoin(config.BACKEND_URL, "sdk/models/6646261c6eb563165658bbb1") + model_ref_response = { + "id": "6646261c6eb563165658bbb1", + "name": "Test LLM", + "description": "Test LLM Description", + "function": {"id": "text-generation"}, + "supplier": "openai", + "version": {"id": "1.0"}, + "status": "onboarded", + "pricing": {"currency": "USD", "value": 0.0}, + } + mock.get(url, headers=headers, json=model_ref_response) + + agent = AgentFactory.create( + name="Test Agent", + description="Test Agent Description", + llm_id="6646261c6eb563165658bbb1", + tools=[AgentFactory.create_model_tool(supplier=Supplier.OPENAI, function="text-generation")], + ) + + assert agent.name == ref_response["name"] + assert agent.description == ref_response["description"] + assert agent.llm_id == ref_response["llmId"] + assert agent.tools[0].function.value == ref_response["assets"][0]["function"] From 3357e56e1d66d21891d3262c6ab5ef69e3e9d84a Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Fri, 23 Aug 2024 18:10:54 +0300 Subject: [PATCH 027/105] Adjustments to get user credits (#237) * Removed extra wallet_factoy.py * Wallet API issue * Added changed --------- Co-authored-by: xainaz --- aixplain/factories/wallet_factory.py | 9 ++++----- tests/unit/wallet_test.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/aixplain/factories/wallet_factory.py b/aixplain/factories/wallet_factory.py index 59ec7c14..b0a55b65 100644 --- a/aixplain/factories/wallet_factory.py +++ b/aixplain/factories/wallet_factory.py @@ -2,6 +2,7 @@ from aixplain.modules.wallet import Wallet from aixplain.utils.file_utils import _request_with_retry import logging +from typing import Text class WalletFactory: @@ -9,16 +10,14 @@ class WalletFactory: backend_url = config.BACKEND_URL @classmethod - def get(cls) -> Wallet: + def get(cls, api_key: Text = config.TEAM_API_KEY) -> Wallet: """Get wallet information""" try: resp = None - # Check for code 200, other code will be caught when trying to return a Wallet object url = f"{cls.backend_url}/sdk/billing/wallet" - - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} logging.info(f"Start fetching billing information from - {url} - {headers}") - headers = {"Content-Type": "application/json", "x-api-key": config.TEAM_API_KEY} + headers = {"Content-Type": "application/json", "x-api-key": api_key} r = _request_with_retry("get", url, headers=headers) resp = r.json() return Wallet(total_balance=resp["totalBalance"], reserved_balance=resp["reservedBalance"]) diff --git a/tests/unit/wallet_test.py b/tests/unit/wallet_test.py index 48ee19ab..16561dba 100644 --- a/tests/unit/wallet_test.py +++ b/tests/unit/wallet_test.py @@ -11,6 +11,6 @@ def test_wallet_service(): headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} ref_response = {"totalBalance": 5, "reservedBalance": "0"} mock.get(url, headers=headers, json=ref_response) - wallet = WalletFactory.get() + wallet = WalletFactory.get(config.AIXPLAIN_API_KEY) assert wallet.total_balance == ref_response["totalBalance"] assert wallet.reserved_balance == ref_response["reservedBalance"] From ee76afdacd3b73add74f5117047cfd390169bd59 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Fri, 23 Aug 2024 23:27:24 +0300 Subject: [PATCH 028/105] Put conditions inside try statements according to changes required. (#231) * Fixing mixup * Fixing mixup * Fixing mixup * Put conditions inside try statements according to changes required. * Changes made * Logging the error + changing the range of error codes * Logging the error + changing the range of error codes * Logging the error + Account * Added elaborative error messages * Added test for model errors * Added all tests --------- Co-authored-by: zaina826 Co-authored-by: xainaz --- .gitignore | 1 - aixplain/modules/model/__init__.py | 26 +++++++++++++++++---- aixplain/modules/model/llm_model.py | 26 +++++++++++++++++---- aixplain/modules/pipeline/asset.py | 26 +++++++++++++++++---- tests/unit/llm_test.py | 36 +++++++++++++++++++++++++++++ tests/unit/model_test.py | 29 ++++++++++++++++++++++- tests/unit/pipeline_test.py | 26 +++++++++++++++++++++ 7 files changed, 153 insertions(+), 17 deletions(-) create mode 100644 tests/unit/llm_test.py diff --git a/.gitignore b/.gitignore index ad7c16c8..3ec74da5 100644 --- a/.gitignore +++ b/.gitignore @@ -131,4 +131,3 @@ dmypy.json # Vscode .vscode .DS_Store - diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 4be40225..8fcd80d2 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -239,11 +239,27 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param resp = None try: - resp = r.json() - logging.info(f"Result of request for {name} - {r.status_code} - {resp}") - - poll_url = resp["data"] - response = {"status": "IN_PROGRESS", "url": poll_url} + if 200 <= r.status_code < 300: + resp = r.json() + logging.info(f"Result of request for {name} - {r.status_code} - {resp}") + poll_url = resp["data"] + response = {"status": "IN_PROGRESS", "url": poll_url} + else: + if r.status_code == 401: + error = "Unauthorized API key: Please verify the spelling of the API key and its current validity." + elif 460 <= r.status_code < 470: + error = "Subscription-related error: Please ensure that your subscription is active and has not expired." + elif 470 <= r.status_code < 480: + error = "Billing-related error: Please ensure you have enough credits to run this model. " + elif 480 <= r.status_code < 490: + error = "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access." + elif 490 <= r.status_code < 500: + error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." + else: + status_code = str(r.status_code) + error = f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + response = {"status": "FAILED", "error_message": error} + logging.error(f"Error in request for {name} - {r.status_code}: {error}") except Exception: response = {"status": "FAILED"} msg = f"Error in request for {name} - {traceback.format_exc()}" diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 5c5c4140..c595d207 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -214,11 +214,27 @@ def run_async( resp = None try: - resp = r.json() - logging.info(f"Result of request for {name} - {r.status_code} - {resp}") - - poll_url = resp["data"] - response = {"status": "IN_PROGRESS", "url": poll_url} + if 200 <= r.status_code < 300: + resp = r.json() + logging.info(f"Result of request for {name} - {r.status_code} - {resp}") + poll_url = resp["data"] + response = {"status": "IN_PROGRESS", "url": poll_url} + else: + if r.status_code == 401: + error = "Unauthorized API key: Please verify the spelling of the API key and its current validity." + elif 460 <= r.status_code < 470: + error = "Subscription-related error: Please ensure that your subscription is active and has not expired." + elif 470 <= r.status_code < 480: + error = "Billing-related error: Please ensure you have enough credits to run this model. " + elif 480 <= r.status_code < 490: + error = "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access." + elif 490 <= r.status_code < 500: + error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." + else: + status_code = str(r.status_code) + error = f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + response = {"status": "FAILED", "error_message": error} + logging.error(f"Error in request for {name} - {r.status_code}: {error}") except Exception: response = {"status": "FAILED"} msg = f"Error in request for {name} - {traceback.format_exc()}" diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index ad7cfa1b..860a08a5 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -323,11 +323,27 @@ def run_async( resp = None try: - resp = r.json() - logging.info(f"Result of request for {name} - {r.status_code} - {resp}") - - poll_url = resp["url"] - response = {"status": "IN_PROGRESS", "url": poll_url} + if 200 <= r.status_code < 300: + resp = r.json() + logging.info(f"Result of request for {name} - {r.status_code} - {resp}") + poll_url = resp["url"] + response = {"status": "IN_PROGRESS", "url": poll_url} + else: + if r.status_code == 401: + error = "Unauthorized API key: Please verify the spelling of the API key and its current validity." + elif 460 <= r.status_code < 470: + error = "Subscription-related error: Please ensure that your subscription is active and has not expired." + elif 470 <= r.status_code < 480: + error = "Billing-related error: Please ensure you have enough credits to run this pipeline. " + elif 480 <= r.status_code < 490: + error = "Supplier-related error: Please ensure that the selected supplier provides the pipeline you are trying to access." + elif 490 <= r.status_code < 500: + error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." + else: + status_code = str(r.status_code) + error = f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + response = {"status": "FAILED", "error_message": error} + logging.error(f"Error in request for {name} - {r.status_code}: {error}") except Exception: response = {"status": "FAILED"} if resp is not None: diff --git a/tests/unit/llm_test.py b/tests/unit/llm_test.py new file mode 100644 index 00000000..430fc338 --- /dev/null +++ b/tests/unit/llm_test.py @@ -0,0 +1,36 @@ + +from dotenv import load_dotenv +from urllib.parse import urljoin +import requests_mock +from aixplain.enums import Function + +load_dotenv() +from aixplain.utils import config +from aixplain.modules import LLM + +import pytest + +@pytest.mark.parametrize( + "status_code,error_message", + [ + (401,"Unauthorized API key: Please verify the spelling of the API key and its current validity."), + (465,"Subscription-related error: Please ensure that your subscription is active and has not expired."), + (475,"Billing-related error: Please ensure you have enough credits to run this model. "), + (485, "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access."), + (495, "Validation-related error: Please ensure all required fields are provided and correctly formatted."), + (501, "Status 501: Unspecified error: An unspecified error occurred while processing your request."), + + ], +) + +def test_run_async_errors(status_code, error_message): + base_url = config.MODELS_RUN_URL + llm_id = "llm-id" + execute_url = urljoin(base_url, f"execute/{llm_id}") + + with requests_mock.Mocker() as mock: + mock.post(execute_url, status_code=status_code) + test_llm = LLM(id=llm_id, name="Test llm",url=base_url, function=Function.TEXT_GENERATION) + response = test_llm.run_async(data="input_data") + assert response["status"] == "FAILED" + assert response["error_message"] == error_message \ No newline at end of file diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index 269c821e..cd6f7a5a 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -17,10 +17,11 @@ """ from dotenv import load_dotenv +from urllib.parse import urljoin +import requests_mock load_dotenv() import re -import requests_mock from aixplain.utils import config from aixplain.modules import Model @@ -57,3 +58,29 @@ def test_failed_poll(): assert hyp_response["error"] == ref_response["error"] assert hyp_response["supplierError"] == ref_response["supplierError"] assert hyp_response["status"] == "FAILED" + + +@pytest.mark.parametrize( + "status_code,error_message", + [ + (401,"Unauthorized API key: Please verify the spelling of the API key and its current validity."), + (465,"Subscription-related error: Please ensure that your subscription is active and has not expired."), + (475,"Billing-related error: Please ensure you have enough credits to run this model. "), + (485, "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access."), + (495, "Validation-related error: Please ensure all required fields are provided and correctly formatted."), + (501, "Status 501: Unspecified error: An unspecified error occurred while processing your request."), + + ], +) + +def test_run_async_errors(status_code, error_message): + base_url = config.MODELS_RUN_URL + model_id = "model-id" + execute_url = urljoin(base_url, f"execute/{model_id}") + + with requests_mock.Mocker() as mock: + mock.post(execute_url, status_code=status_code) + test_model = Model(id=model_id, name="Test Model",url=base_url) + response = test_model.run_async(data="input_data") + assert response["status"] == "FAILED" + assert response["error_message"] == error_message \ No newline at end of file diff --git a/tests/unit/pipeline_test.py b/tests/unit/pipeline_test.py index e983a298..d3c1c725 100644 --- a/tests/unit/pipeline_test.py +++ b/tests/unit/pipeline_test.py @@ -17,6 +17,7 @@ """ from dotenv import load_dotenv +import pytest load_dotenv() import requests_mock @@ -36,3 +37,28 @@ def test_create_pipeline(): hyp_pipeline = PipelineFactory.create(pipeline={"nodes": []}, name="Pipeline Test") assert hyp_pipeline.id == ref_pipeline.id assert hyp_pipeline.name == ref_pipeline.name + +@pytest.mark.parametrize( + "status_code,error_message", + [ + (401,"Unauthorized API key: Please verify the spelling of the API key and its current validity."), + (465,"Subscription-related error: Please ensure that your subscription is active and has not expired."), + (475,"Billing-related error: Please ensure you have enough credits to run this pipeline. "), + (485, "Supplier-related error: Please ensure that the selected supplier provides the pipeline you are trying to access."), + (495, "Validation-related error: Please ensure all required fields are provided and correctly formatted."), + (501, "Status 501: Unspecified error: An unspecified error occurred while processing your request."), + + ], +) + +def test_run_async_errors(status_code, error_message): + base_url = config.BACKEND_URL + pipeline_id = "pipeline_id" + execute_url = f"{base_url}/assets/pipeline/execution/run/{pipeline_id}" + + with requests_mock.Mocker() as mock: + mock.post(execute_url, status_code=status_code) + test_pipeline = Pipeline(id=pipeline_id, api_key=config.TEAM_API_KEY, name="Test Pipeline", url=base_url) + response = test_pipeline.run_async(data="input_data") + assert response["status"] == "FAILED" + assert response["error_message"] == error_message \ No newline at end of file From 1660f5ff297c0816fcdacc5451acf02c1b55ec93 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Tue, 27 Aug 2024 17:58:42 +0300 Subject: [PATCH 029/105] Fixing none credit (#238) * Removed extra wallet_factoy.py * Initial Commit * Added total balance attribute and tested it --------- Co-authored-by: xainaz Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> --- aixplain/factories/wallet_factory.py | 5 ++++- aixplain/modules/wallet.py | 9 +++++---- tests/unit/wallet_test.py | 7 ++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/aixplain/factories/wallet_factory.py b/aixplain/factories/wallet_factory.py index b0a55b65..b36000f1 100644 --- a/aixplain/factories/wallet_factory.py +++ b/aixplain/factories/wallet_factory.py @@ -20,6 +20,9 @@ def get(cls, api_key: Text = config.TEAM_API_KEY) -> Wallet: headers = {"Content-Type": "application/json", "x-api-key": api_key} r = _request_with_retry("get", url, headers=headers) resp = r.json() - return Wallet(total_balance=resp["totalBalance"], reserved_balance=resp["reservedBalance"]) + total_balance = float(resp.get("totalBalance", 0.0)) + reserved_balance = float(resp.get("reservedBalance", 0.0)) + + return Wallet(total_balance=total_balance, reserved_balance=reserved_balance) except Exception as e: raise Exception(f"Failed to get the wallet credit information. Error: {str(e)}") diff --git a/aixplain/modules/wallet.py b/aixplain/modules/wallet.py index d7c63524..d61b04ee 100644 --- a/aixplain/modules/wallet.py +++ b/aixplain/modules/wallet.py @@ -24,11 +24,12 @@ class Wallet: def __init__(self, total_balance: float, reserved_balance: float): - """Create a Wallet with the necessary information - + """ Args: - total_balance (float): total credit balance - reserved_balance (float): reserved credit balance + total_balance (float) + reserved_balance (float) + available_balance (float) """ self.total_balance = total_balance self.reserved_balance = reserved_balance + self.available_balance = total_balance-reserved_balance diff --git a/tests/unit/wallet_test.py b/tests/unit/wallet_test.py index 16561dba..50acbbdb 100644 --- a/tests/unit/wallet_test.py +++ b/tests/unit/wallet_test.py @@ -11,6 +11,7 @@ def test_wallet_service(): headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} ref_response = {"totalBalance": 5, "reservedBalance": "0"} mock.get(url, headers=headers, json=ref_response) - wallet = WalletFactory.get(config.AIXPLAIN_API_KEY) - assert wallet.total_balance == ref_response["totalBalance"] - assert wallet.reserved_balance == ref_response["reservedBalance"] + wallet = WalletFactory.get() + assert wallet.total_balance == float(ref_response["totalBalance"]) + assert wallet.reserved_balance == float(ref_response["reservedBalance"]) + assert wallet.available_balance == float(ref_response["totalBalance"]) - float(ref_response["reservedBalance"]) From 9a89f5230cbc4ec70bf571092c96c1f09e2b5a22 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 28 Aug 2024 16:52:41 -0300 Subject: [PATCH 030/105] Update click dependency (#241) --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5b0ded4b..be397bdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.13rc2" +version = "0.2.18" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" @@ -49,7 +49,7 @@ dependencies = [ "python-dotenv>=1.0.0", "validators>=0.20.0", "filetype>=1.2.0", - "click>=7.1.2,<8.0.0", + "click>=7.1.2", "PyYAML>=6.0.1", "dataclasses-json>=0.5.2", "Jinja2==3.1.4", From cb0d3139e8065dfd5e54ddeb72604a08e9aebb1e Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Tue, 3 Sep 2024 00:04:56 +0300 Subject: [PATCH 031/105] Added input and output attributes to model (#244) * Added input and output attributes to model * Added correct test * Fixed model class * Fixed model factory * Getting the parameters from right source and add functional test --------- Co-authored-by: xainaz Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/model_factory.py | 51 ++++++++++++------- aixplain/modules/model/__init__.py | 8 ++- .../general_assets/asset_functional_test.py | 22 ++++++++ tests/unit/model_test.py | 14 +++-- 4 files changed, 69 insertions(+), 26 deletions(-) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index c11d837a..da44600c 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -30,6 +30,7 @@ from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin from warnings import warn +from aixplain.enums.function import FunctionInputOutput class ModelFactory: @@ -66,6 +67,12 @@ def _create_model_from_response(cls, response: Dict) -> Model: if function == Function.TEXT_GENERATION: ModelClass = LLM + function_id = response["function"]["id"] + function = Function(function_id) + function_io = FunctionInputOutput.get(function_id, None) + input_params = {param["code"]: param for param in function_io["spec"]["params"]} + output_params = {param["code"]: param for param in function_io["spec"]["output"]} + return ModelClass( response["id"], response["name"], @@ -74,6 +81,8 @@ def _create_model_from_response(cls, response: Dict) -> Model: cost=response["pricing"], function=function, parameters=parameters, + input_params=input_params, + output_params=output_params, is_subscribed=True if "subscription" in response else False, version=response["version"]["id"], ) @@ -270,7 +279,7 @@ def list_host_machines(cls, api_key: Optional[Text] = None) -> List[Dict]: for dictionary in response_dicts: del dictionary["id"] return response_dicts - + @classmethod def list_gpus(cls, api_key: Optional[Text] = None) -> List[List[Text]]: """List GPU names on which you can host your language model. @@ -335,7 +344,7 @@ def create_asset_repo( input_modality: Text, output_modality: Text, documentation_url: Optional[Text] = "", - api_key: Optional[Text] = None + api_key: Optional[Text] = None, ) -> Dict: """Creates an image repository for this model and registers it in the platform backend. @@ -362,7 +371,7 @@ def create_asset_repo( function_id = function_dict["id"] if function_id is None: raise Exception(f"Invalid function name {function}") - create_url = urljoin(config.BACKEND_URL, f"sdk/models/onboard") + create_url = urljoin(config.BACKEND_URL, "sdk/models/onboard") logging.debug(f"URL: {create_url}") if api_key: headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} @@ -373,19 +382,14 @@ def create_asset_repo( "model": { "name": name, "description": description, - "connectionType": [ - "synchronous" - ], + "connectionType": ["synchronous"], "function": function_id, - "modalities": [ - f"{input_modality}-{output_modality}" - ], + "modalities": [f"{input_modality}-{output_modality}"], "documentationUrl": documentation_url, - "sourceLanguage": source_language + "sourceLanguage": source_language, }, "source": "aixplain-ecr", - "onboardingParams": { - } + "onboardingParams": {}, } logging.debug(f"Body: {str(payload)}") response = _request_with_retry("post", create_url, headers=headers, json=payload) @@ -412,12 +416,18 @@ def asset_repo_login(cls, api_key: Optional[Text] = None) -> Dict: else: headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} response = _request_with_retry("post", login_url, headers=headers) - print(f"Response: {response}") response_dict = json.loads(response.text) return response_dict @classmethod - def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, host_machine: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict: + def onboard_model( + cls, + model_id: Text, + image_tag: Text, + image_hash: Text, + host_machine: Optional[Text] = "", + api_key: Optional[Text] = None, + ) -> Dict: """Onboard a model after its image has been pushed to ECR. Args: @@ -446,7 +456,14 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, host_m return response @classmethod - def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, revision: Optional[Text] = "", hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict: + def deploy_huggingface_model( + cls, + name: Text, + hf_repo_id: Text, + revision: Optional[Text] = "", + hf_token: Optional[Text] = "", + api_key: Optional[Text] = None, + ) -> Dict: """Onboards and deploys a Hugging Face large language model. Args: @@ -477,8 +494,8 @@ def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, revision: Option "hf_supplier": supplier, "hf_model_name": model_name, "hf_token": hf_token, - "revision": revision - } + "revision": revision, + }, } response = _request_with_retry("post", deploy_url, headers=headers, json=body) logging.debug(response.text) diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 8fcd80d2..e18f1896 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -48,6 +48,8 @@ class Model(Asset): backend_url (str): URL of the backend. pricing (Dict, optional): model price. Defaults to None. **additional_info: Any additional Model info to be saved + input_params (Dict, optional): input parameters for the function. + output_params (Dict, optional): output parameters for the function. """ def __init__( @@ -61,6 +63,8 @@ def __init__( function: Optional[Function] = None, is_subscribed: bool = False, cost: Optional[Dict] = None, + input_params: Optional[Dict] = None, + output_params: Optional[Dict] = None, **additional_info, ) -> None: """Model Init @@ -84,6 +88,8 @@ def __init__( self.backend_url = config.BACKEND_URL self.function = function self.is_subscribed = is_subscribed + self.input_params = input_params + self.output_params = output_params def to_dict(self) -> Dict: """Get the model info as a Dictionary @@ -92,7 +98,7 @@ def to_dict(self) -> Dict: Dict: Model Information """ clean_additional_info = {k: v for k, v in self.additional_info.items() if v is not None} - return {"id": self.id, "name": self.name, "supplier": self.supplier, "additional_info": clean_additional_info} + return {"id": self.id, "name": self.name, "supplier": self.supplier, "additional_info": clean_additional_info, "input_params": self.input_params,"output_params": self.output_params,} def __repr__(self): try: diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index d35a4d9a..b0d8f6ef 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -112,3 +112,25 @@ def test_llm_instantiation(): """Test that the LLM model is correctly instantiated.""" models = ModelFactory.list(function=Function.TEXT_GENERATION)["results"] assert isinstance(models[0], LLM) + + +def test_model_io(): + model_id = "64aee5824d34b1221e70ac07" + model = ModelFactory.get(model_id) + + expected_input = { + "text": { + "name": "Text Prompt", + "code": "text", + "required": True, + "isFixed": False, + "dataType": "text", + "dataSubType": "text", + "multipleValues": False, + "defaultValues": [], + } + } + expected_output = {"data": {"name": "Generated Image", "code": "data", "defaultValue": [], "dataType": "image"}} + + assert model.input_params == expected_input + assert model.output_params == expected_output diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index cd6f7a5a..c52bb950 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -63,24 +63,22 @@ def test_failed_poll(): @pytest.mark.parametrize( "status_code,error_message", [ - (401,"Unauthorized API key: Please verify the spelling of the API key and its current validity."), - (465,"Subscription-related error: Please ensure that your subscription is active and has not expired."), - (475,"Billing-related error: Please ensure you have enough credits to run this model. "), + (401, "Unauthorized API key: Please verify the spelling of the API key and its current validity."), + (465, "Subscription-related error: Please ensure that your subscription is active and has not expired."), + (475, "Billing-related error: Please ensure you have enough credits to run this model. "), (485, "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access."), (495, "Validation-related error: Please ensure all required fields are provided and correctly formatted."), (501, "Status 501: Unspecified error: An unspecified error occurred while processing your request."), - ], ) - def test_run_async_errors(status_code, error_message): base_url = config.MODELS_RUN_URL model_id = "model-id" execute_url = urljoin(base_url, f"execute/{model_id}") - + with requests_mock.Mocker() as mock: mock.post(execute_url, status_code=status_code) - test_model = Model(id=model_id, name="Test Model",url=base_url) + test_model = Model(id=model_id, name="Test Model", url=base_url) response = test_model.run_async(data="input_data") assert response["status"] == "FAILED" - assert response["error_message"] == error_message \ No newline at end of file + assert response["error_message"] == error_message From 716d898144db58a3adeffa01441f80fbaa09bfbb Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Tue, 3 Sep 2024 20:02:39 +0300 Subject: [PATCH 032/105] Eng 467 ai xplain sdk update finetune functional tests to cover all new finetunable models (#242) * Initial commit for finetune test * Added createdAt * Added correct tests * Added test, issue with dev, not passing pytest * Added test, issue with dev, not passing pytest * Added createdAt * Added createdAt * Added createdAt --------- Co-authored-by: xainaz Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/model_factory.py | 5 +++ aixplain/modules/model/__init__.py | 20 +++++++-- .../finetune/finetune_functional_test.py | 42 +++++++++++++------ 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index da44600c..d82bdd63 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -31,6 +31,7 @@ from urllib.parse import urljoin from warnings import warn from aixplain.enums.function import FunctionInputOutput +from datetime import datetime class ModelFactory: @@ -67,6 +68,9 @@ def _create_model_from_response(cls, response: Dict) -> Model: if function == Function.TEXT_GENERATION: ModelClass = LLM + created_at = None + if "createdAt" in response and response["createdAt"]: + created_at = datetime.fromisoformat(response["createdAt"].replace("Z", "+00:00")) function_id = response["function"]["id"] function = Function(function_id) function_io = FunctionInputOutput.get(function_id, None) @@ -80,6 +84,7 @@ def _create_model_from_response(cls, response: Dict) -> Model: api_key=response["api_key"], cost=response["pricing"], function=function, + created_at=created_at, parameters=parameters, input_params=input_params, output_params=output_params, diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index e18f1896..2e9445b5 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -30,6 +30,7 @@ from urllib.parse import urljoin from aixplain.utils.file_utils import _request_with_retry from typing import Union, Optional, Text, Dict +from datetime import datetime class Model(Asset): @@ -63,6 +64,7 @@ def __init__( function: Optional[Function] = None, is_subscribed: bool = False, cost: Optional[Dict] = None, + created_at: Optional[datetime] = None, input_params: Optional[Dict] = None, output_params: Optional[Dict] = None, **additional_info, @@ -88,8 +90,9 @@ def __init__( self.backend_url = config.BACKEND_URL self.function = function self.is_subscribed = is_subscribed - self.input_params = input_params - self.output_params = output_params + self.created_at = created_at + self.input_params = input_params + self.output_params = output_params def to_dict(self) -> Dict: """Get the model info as a Dictionary @@ -98,7 +101,14 @@ def to_dict(self) -> Dict: Dict: Model Information """ clean_additional_info = {k: v for k, v in self.additional_info.items() if v is not None} - return {"id": self.id, "name": self.name, "supplier": self.supplier, "additional_info": clean_additional_info, "input_params": self.input_params,"output_params": self.output_params,} + return { + "id": self.id, + "name": self.name, + "supplier": self.supplier, + "additional_info": clean_additional_info, + "input_params": self.input_params, + "output_params": self.output_params, + } def __repr__(self): try: @@ -263,7 +273,9 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." else: status_code = str(r.status_code) - error = f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + error = ( + f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + ) response = {"status": "FAILED", "error_message": error} logging.error(f"Error in request for {name} - {r.status_code}: {error}") except Exception: diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index 7b45613c..46520137 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -1,5 +1,4 @@ __author__ = "lucaspavanelli" - """ Copyright 2022 The aiXplain SDK authors @@ -26,6 +25,7 @@ from aixplain.factories import FinetuneFactory from aixplain.modules.finetune.cost import FinetuneCost from aixplain.enums import Function, Language +from datetime import datetime, timedelta, timezone import pytest @@ -40,11 +40,6 @@ def read_data(data_path): return json.load(open(data_path, "r")) -@pytest.fixture(scope="module", params=read_data(RUN_FILE)) -def run_input_map(request): - return request.param - - @pytest.fixture(scope="module", params=read_data(ESTIMATE_COST_FILE)) def estimate_cost_input_map(request): return request.param @@ -60,11 +55,32 @@ def validate_prompt_input_map(request): return request.param -def test_end2end(run_input_map): - model = ModelFactory.get(run_input_map["model_id"]) - dataset_list = [DatasetFactory.list(query=run_input_map["dataset_name"])["results"][0]] +def pytest_generate_tests(metafunc): + if "input_map" in metafunc.fixturenames: + four_weeks_ago = datetime.now(timezone.utc) - timedelta(weeks=4) + models = ModelFactory.list(function=Function.TEXT_GENERATION, is_finetunable=True)["results"] + + recent_models = [ + { + "model_name": model.name, + "model_id": model.id, + "dataset_name": "Test text generation dataset", + "inference_data": "Hello!", + "required_dev": True, + "search_metadata": False, + } + for model in models + if model.created_at is not None and model.created_at >= four_weeks_ago + ] + recent_models += read_data(RUN_FILE) + metafunc.parametrize("input_map", recent_models) + + +def test_end2end(input_map): + model = input_map["model_id"] + dataset_list = [DatasetFactory.list(query=input_map["dataset_name"])["results"][0]] train_percentage, dev_percentage = 100, 0 - if run_input_map["required_dev"]: + if input_map["required_dev"]: train_percentage, dev_percentage = 80, 20 finetune = FinetuneFactory.create( str(uuid.uuid4()), dataset_list, model, train_percentage=train_percentage, dev_percentage=dev_percentage @@ -85,12 +101,12 @@ def test_end2end(run_input_map): assert finetune_model.check_finetune_status().model_status.value == "onboarded" time.sleep(30) print(f"Model dict: {finetune_model.__dict__}") - result = finetune_model.run(run_input_map["inference_data"]) + result = finetune_model.run(input_map["inference_data"]) print(f"Result: {result}") assert result is not None - if run_input_map["search_metadata"]: + if input_map["search_metadata"]: assert "details" in result - assert len(result["details"]) > 0 + assert len(result["details"]) > 0 assert "metadata" in result["details"][0] assert len(result["details"][0]["metadata"]) > 0 finetune_model.delete() From f3d89edc2a2ca7914cc13fa6f27bc0deb92b0465 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Thu, 5 Sep 2024 17:30:34 +0300 Subject: [PATCH 033/105] Added name to update (#245) Co-authored-by: xainaz --- aixplain/modules/pipeline/asset.py | 18 ++++++++++++------ tests/functional/pipelines/create_test.py | 3 ++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index 860a08a5..6933f601 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -262,10 +262,11 @@ def __prepare_payload( dasset = DatasetFactory.get(str(data_asset[node_label])) asset_payload["dataAsset"]["dataset_id"] = dasset.id - if ( - len([dfield for dfield in dasset.source_data if dasset.source_data[dfield].id == data[node_label]]) - > 0 - ): + source_data_list = [ + dfield for dfield in dasset.source_data if dasset.source_data[dfield].id == data[node_label] + ] + + if len(source_data_list) > 0: data_found = True else: for target in dasset.target_data: @@ -341,9 +342,11 @@ def run_async( error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." else: status_code = str(r.status_code) - error = f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + error = ( + f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + ) response = {"status": "FAILED", "error_message": error} - logging.error(f"Error in request for {name} - {r.status_code}: {error}") + logging.error(f"Error in request for {name} - {r.status_code}: {error}") except Exception: response = {"status": "FAILED"} if resp is not None: @@ -355,6 +358,7 @@ def update( pipeline: Union[Text, Dict], save_as_asset: bool = False, api_key: Optional[Text] = None, + name: Optional[Text] = None, ): """Update Pipeline @@ -382,6 +386,8 @@ def update( status = "draft" if save_as_asset is True: status = "onboarded" + if name: + self.name = name payload = { "name": self.name, "status": status, diff --git a/tests/functional/pipelines/create_test.py b/tests/functional/pipelines/create_test.py index 6431bd41..6cf3d718 100644 --- a/tests/functional/pipelines/create_test.py +++ b/tests/functional/pipelines/create_test.py @@ -54,7 +54,8 @@ def test_update_pipeline(): pipeline_name = str(uuid4()) pipeline = PipelineFactory.create(name=pipeline_name, pipeline=pipeline_dict) - pipeline.update(pipeline=pipeline_json, save_as_asset=True) + pipeline.update(pipeline=pipeline_json, save_as_asset=True, name="NEW NAME") + assert pipeline.name == "NEW NAME" assert isinstance(pipeline, Pipeline) assert pipeline.id != "" pipeline.delete() From 170030407ca31f71b3208d7f7d993ebb6a0e7e89 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:20:06 -0300 Subject: [PATCH 034/105] ENG-504: Make the agent architecture configurable (#243) * Make the agent architecture configurable * Remove One tool requirement in Agent Creation --- aixplain/factories/agent_factory/__init__.py | 4 ++-- tests/functional/agent/agent_functional_test.py | 5 ++++- tests/unit/agent_test.py | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 134b3560..0c8fc50c 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -44,7 +44,7 @@ class AgentFactory: def create( cls, name: Text, - llm_id: Text, + llm_id: Text = "669a63646eb56306647e1091", tools: List[Tool] = [], description: Text = "", api_key: Text = config.TEAM_API_KEY, @@ -56,7 +56,7 @@ def create( Args: name (Text): name of the agent - llm_id (Text): aiXplain ID of the large language model to be used as agent. + llm_id (Text, optional): aiXplain ID of the large language model to be used as agent. Defaults to "669a63646eb56306647e1091" (GPT-4o mini). tools (List[Tool], optional): list of tool for the agent. Defaults to []. description (Text, optional): description of the agent role. Defaults to "". api_key (Text, optional): team/user API key. Defaults to config.TEAM_API_KEY. diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 0acdb5be..491977e8 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -20,6 +20,7 @@ load_dotenv() from aixplain.factories import AgentFactory +from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier import pytest @@ -79,5 +80,7 @@ def test_list_agents(): def test_fail_non_existent_llm(): with pytest.raises(Exception) as exc_info: - AgentFactory.create(name="Test Agent", llm_id="non_existent_llm", tools=[]) + AgentFactory.create( + name="Test Agent", llm_id="non_existent_llm", tools=[AgentFactory.create_model_tool(function=Function.TRANSLATION)] + ) assert str(exc_info.value) == "Large Language Model with ID 'non_existent_llm' not found." diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 8a619011..9606a3e5 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -69,7 +69,10 @@ def test_sucess_query_content(): def test_invalid_pipelinetool(): with pytest.raises(Exception) as exc_info: AgentFactory.create( - name="Test", tools=[PipelineTool(pipeline="309851793", description="Test")], llm_id="6646261c6eb563165658bbb1" + name="Test", + description="Test Description", + tools=[PipelineTool(pipeline="309851793", description="Test")], + llm_id="6646261c6eb563165658bbb1", ) assert str(exc_info.value) == "Pipeline Tool Unavailable. Make sure Pipeline '309851793' exists or you have access to it." From 357e10da23a23ff063c855a6f2a5d8fd4fe91e7a Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Mon, 9 Sep 2024 23:30:14 +0300 Subject: [PATCH 035/105] Eng 544 ai xplain sdk update llm functional tests to cover all new llm models (#248) * Added recent model to LLM test * Added combined models to test * Passing in instantiated model * Passing in instantiated model --------- Co-authored-by: xainaz --- tests/functional/model/run_model_test.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py index 79979357..47f351bb 100644 --- a/tests/functional/model/run_model_test.py +++ b/tests/functional/model/run_model_test.py @@ -1,20 +1,31 @@ __author__ = "thiagocastroferreira" -import pytest from aixplain.enums import Function from aixplain.factories import ModelFactory from aixplain.modules import LLM +from datetime import datetime, timedelta, timezone + + +def pytest_generate_tests(metafunc): + if "llm_model" in metafunc.fixturenames: + four_weeks_ago = datetime.now(timezone.utc) - timedelta(weeks=4) + models = ModelFactory.list(function=Function.TEXT_GENERATION)["results"] + + predefined_models = ["Groq Llama 3 70B", "Chat GPT 3.5", "GPT-4o", "GPT 4 (32k)"] + recent_models = [model for model in models if model.created_at and model.created_at >= four_weeks_ago] + combined_models = recent_models + [ + ModelFactory.list(query=model, function=Function.TEXT_GENERATION)["results"][0] for model in predefined_models + ] + metafunc.parametrize("llm_model", combined_models) -@pytest.mark.parametrize("llm_model", ["Groq Llama 3 70B", "Chat GPT 3.5", "GPT-4o", "GPT 4 (32k)"]) def test_llm_run(llm_model): """Testing LLMs with history context""" - model = ModelFactory.list(query=llm_model, function=Function.TEXT_GENERATION)["results"][0] - assert isinstance(model, LLM) + assert isinstance(llm_model, LLM) - response = model.run( + response = llm_model.run( data="What is my name?", history=[{"role": "user", "content": "Hello! My name is Thiago."}, {"role": "assistant", "content": "Hello!"}], ) From 731a150678c34083d97e6246f84c8a3c21acb390 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Tue, 10 Sep 2024 20:23:49 +0200 Subject: [PATCH 036/105] Eng 399 - Introducing Metric Nodes in Designer (#247) * implemented designer metric node * function test for designer metric node * Metric node populated on fetch * designer node construction fix * minor * minor bug fix * Fixing data asset reference in metric pipeline test * minor bug fixes * Remove undesired print --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/pipeline_factory/utils.py | 9 ++- .../modules/pipeline/designer/__init__.py | 6 ++ aixplain/modules/pipeline/designer/base.py | 9 ++- aixplain/modules/pipeline/designer/enums.py | 1 + aixplain/modules/pipeline/designer/nodes.py | 72 ++++++++++++++++--- .../modules/pipeline/designer/pipeline.py | 12 ++++ aixplain/modules/pipeline/generate.py | 3 + aixplain/modules/pipeline/pipeline.py | 17 ++--- tests/functional/pipelines/designer_test.py | 51 ++++++++++++- 9 files changed, 155 insertions(+), 25 deletions(-) diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py index 465e5e7f..aba93ef9 100644 --- a/aixplain/factories/pipeline_factory/utils.py +++ b/aixplain/factories/pipeline_factory/utils.py @@ -7,7 +7,8 @@ from aixplain.modules.pipeline.designer import ( Input, Output, - AssetNode, + BareAsset, + BareMetric, Decision, Router, Route, @@ -36,14 +37,16 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipe try: # instantiating nodes for node_json in response["nodes"]: - print(node_json) if node_json["type"].lower() == "input": node = Input( data=node_json["data"] if "data" in node_json else None, data_types=[DataType(dt) for dt in node_json["dataType"]], ) elif node_json["type"].lower() == "asset": - node = AssetNode(asset_id=node_json["assetId"]) + if node_json["functionType"] == "metric": + node = BareMetric(asset_id=node_json["assetId"]) + else: + node = BareAsset(asset_id=node_json["assetId"]) elif node_json["type"].lower() == "segmentor": raise NotImplementedError() elif node_json["type"].lower() == "reconstructor": diff --git a/aixplain/modules/pipeline/designer/__init__.py b/aixplain/modules/pipeline/designer/__init__.py index 0bb56542..81571f21 100644 --- a/aixplain/modules/pipeline/designer/__init__.py +++ b/aixplain/modules/pipeline/designer/__init__.py @@ -8,6 +8,9 @@ Router, BaseReconstructor, BaseSegmentor, + BaseMetric, + BareAsset, + BareMetric ) from .pipeline import DesignerPipeline from .base import ( @@ -36,6 +39,7 @@ __all__ = [ "DesignerPipeline", "AssetNode", + "BareAsset", "Decision", "Script", "Input", @@ -63,4 +67,6 @@ "ParamProxy", "TI", "TO", + "BaseMetric", + "BareMetric" ] diff --git a/aixplain/modules/pipeline/designer/base.py b/aixplain/modules/pipeline/designer/base.py index 8bea73d6..76e6196d 100644 --- a/aixplain/modules/pipeline/designer/base.py +++ b/aixplain/modules/pipeline/designer/base.py @@ -188,8 +188,8 @@ def validate(self): if from_param.data_type and to_param.data_type: if from_param.data_type != to_param.data_type: raise ValueError( - f"Data type mismatch between {from_param.data_type} and {to_param.data_type}" - ) # noqa + f"Data type mismatch between {from_param.data_type} and {to_param.data_type}" # noqa + ) def attach_to(self, pipeline: "DesignerPipeline"): """ @@ -344,6 +344,9 @@ def __init__( if pipeline: self.attach_to(pipeline) + def build_label(self): + return f"{self.type.value}(ID={self.number})" + def attach_to(self, pipeline: "DesignerPipeline"): """ Attach the node to the pipeline. @@ -359,7 +362,7 @@ def attach_to(self, pipeline: "DesignerPipeline"): if self.number is None: self.number = len(pipeline.nodes) if self.label is None: - self.label = f"{self.type.value}(ID={self.number})" + self.label = self.build_label() assert not pipeline.get_node(self.number), "Node number already exists" pipeline.nodes.append(self) diff --git a/aixplain/modules/pipeline/designer/enums.py b/aixplain/modules/pipeline/designer/enums.py index 4c044dba..a3a07a40 100644 --- a/aixplain/modules/pipeline/designer/enums.py +++ b/aixplain/modules/pipeline/designer/enums.py @@ -36,6 +36,7 @@ class FunctionType(str, Enum): AI = "AI" SEGMENTOR = "SEGMENTOR" RECONSTRUCTOR = "RECONSTRUCTOR" + METRIC = "METRIC" class ParamType: diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index 22152239..eb8fecc5 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -55,8 +55,9 @@ def __init__( supplier: str = None, version: str = None, pipeline: "DesignerPipeline" = None, + **kwargs ): - super().__init__(pipeline=pipeline) + super().__init__(pipeline=pipeline, **kwargs) self.asset_id = asset_id self.supplier = supplier self.version = version @@ -85,8 +86,8 @@ def populate_asset(self): if self.function: if self.asset.function.value != self.function: raise ValueError( - f"Function {self.function} is not supported by asset {self.asset_id}" - ) # noqa + f"Function {self.function} is not supported by asset {self.asset_id}" # noqa + ) else: self.function = self.asset.function.value self._auto_populate_params() @@ -129,6 +130,18 @@ def serialize(self) -> dict: return obj +class BareAssetInputs(Inputs): + pass + + +class BareAssetOutputs(Outputs): + pass + + +class BareAsset(AssetNode[BareAssetInputs, BareAssetOutputs]): + pass + + class InputInputs(Inputs): pass @@ -163,10 +176,11 @@ def __init__( data: Optional[str] = None, data_types: Optional[List[DataType]] = None, pipeline: "DesignerPipeline" = None, + **kwargs ): from aixplain.factories.file_factory import FileFactory - super().__init__(pipeline=pipeline) + super().__init__(pipeline=pipeline, **kwargs) self.data_types = data_types or [] self.data = data @@ -209,8 +223,9 @@ def __init__( self, data_types: Optional[List[DataType]] = None, pipeline: "DesignerPipeline" = None, + **kwargs ): - super().__init__(pipeline=pipeline) + super().__init__(pipeline=pipeline, **kwargs) self.data_types = data_types or [] def serialize(self) -> dict: @@ -237,10 +252,11 @@ def __init__( pipeline: "DesignerPipeline" = None, script_path: Optional[str] = None, fileId: Optional[str] = None, + **kwargs ): from aixplain.factories.script_factory import ScriptFactory - super().__init__(pipeline=pipeline) + super().__init__(pipeline=pipeline, **kwargs) assert script_path or fileId, "script_path or fileId is required" @@ -272,6 +288,7 @@ def __init__( path: List[Union[Node, int]], operation: Operation, type: RouteType, + **kwargs ): """ Post init method to convert the nodes to node numbers if they are @@ -328,9 +345,10 @@ class Router(Node[RouterInputs, RouterOutputs], LinkableMixin): outputs_class: Type[TO] = RouterOutputs def __init__( - self, routes: List[Route], pipeline: "DesignerPipeline" = None + self, routes: List[Route], pipeline: "DesignerPipeline" = None, + **kwargs ): - super().__init__(pipeline=pipeline) + super().__init__(pipeline=pipeline, **kwargs) self.routes = routes def serialize(self) -> dict: @@ -369,9 +387,10 @@ class Decision(Node[DecisionInputs, DecisionOutputs], LinkableMixin): outputs_class: Type[TO] = DecisionOutputs def __init__( - self, routes: List[Route], pipeline: "DesignerPipeline" = None + self, routes: List[Route], pipeline: "DesignerPipeline" = None, + **kwargs ): - super().__init__(pipeline=pipeline) + super().__init__(pipeline=pipeline, **kwargs) self.routes = routes def link( @@ -462,3 +481,36 @@ class BareReconstructor( functionType: FunctionType = FunctionType.RECONSTRUCTOR inputs_class: Type[TI] = ReconstructorInputs outputs_class: Type[TO] = ReconstructorOutputs + + +class BaseMetric(AssetNode[TI, TO]): + functionType: FunctionType = FunctionType.METRIC + + def build_label(self): + return f"METRIC({self.number})" + + +class MetricInputs(Inputs): + + hypotheses: InputParam = None + references: InputParam = None + sources: InputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.hypotheses = self.create_param("hypotheses") + self.references = self.create_param("references") + self.sources = self.create_param("sources") + + +class MetricOutputs(Outputs): + + data: OutputParam = None + + def __init__(self, node: Node): + super().__init__(node) + self.data = self.create_param("data") + + +class BareMetric(BaseMetric[MetricInputs, MetricOutputs]): + pass diff --git a/aixplain/modules/pipeline/designer/pipeline.py b/aixplain/modules/pipeline/designer/pipeline.py index 5304d202..b2ebd19b 100644 --- a/aixplain/modules/pipeline/designer/pipeline.py +++ b/aixplain/modules/pipeline/designer/pipeline.py @@ -13,6 +13,7 @@ Route, BareReconstructor, BareSegmentor, + BareMetric ) from .enums import NodeType, RouteType, Operation @@ -326,3 +327,14 @@ def bare_segmentor(self, *args, **kwargs) -> BareSegmentor: :return: the node """ return BareSegmentor(*args, pipeline=self, **kwargs) + + def metric(self, *args, **kwargs) -> BareMetric: + """ + Shortcut to create an metric node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + return BareMetric(*args, pipeline=self, **kwargs) diff --git a/aixplain/modules/pipeline/generate.py b/aixplain/modules/pipeline/generate.py index c71e8ae6..a64917c1 100644 --- a/aixplain/modules/pipeline/generate.py +++ b/aixplain/modules/pipeline/generate.py @@ -31,6 +31,7 @@ AssetNode, BaseReconstructor, BaseSegmentor, + BaseMetric ) from .default import DefaultPipeline from aixplain.modules import asset @@ -160,6 +161,8 @@ def populate_specs(functions: list): base_class = "BaseSegmentor" elif is_reconstructor: base_class = "BaseReconstructor" + elif "metric" in function_name.split("_"): # noqa: Advise a better distinguisher please + base_class = "BaseMetric" spec = { "id": function["id"], diff --git a/aixplain/modules/pipeline/pipeline.py b/aixplain/modules/pipeline/pipeline.py index 36bc643d..e5675e4b 100644 --- a/aixplain/modules/pipeline/pipeline.py +++ b/aixplain/modules/pipeline/pipeline.py @@ -14,6 +14,7 @@ AssetNode, BaseReconstructor, BaseSegmentor, + BaseMetric ) from .default import DefaultPipeline from aixplain.modules import asset @@ -907,7 +908,7 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ReferencelessAudioGenerationMetric(AssetNode[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs]): +class ReferencelessAudioGenerationMetric(BaseMetric[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs]): """ The Referenceless Audio Generation Metric is a tool designed to evaluate the quality of generated audio content without the need for a reference or original @@ -1080,7 +1081,7 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class AudioGenerationMetric(AssetNode[AudioGenerationMetricInputs, AudioGenerationMetricOutputs]): +class AudioGenerationMetric(BaseMetric[AudioGenerationMetricInputs, AudioGenerationMetricOutputs]): """ The Audio Generation Metric is a quantitative measure used to evaluate the quality, accuracy, and overall performance of audio generated by artificial @@ -1471,7 +1472,7 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class MetricAggregation(AssetNode[MetricAggregationInputs, MetricAggregationOutputs]): +class MetricAggregation(BaseMetric[MetricAggregationInputs, MetricAggregationOutputs]): """ Metric Aggregation is a function that computes and summarizes numerical data by applying statistical operations, such as averaging, summing, or finding the @@ -1790,7 +1791,7 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ReferencelessTextGenerationMetric(AssetNode[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs]): +class ReferencelessTextGenerationMetric(BaseMetric[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs]): """ The Referenceless Text Generation Metric is a method for evaluating the quality of generated text without requiring a reference text for comparison, often @@ -1830,7 +1831,7 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextGenerationMetricDefault(AssetNode[TextGenerationMetricDefaultInputs, TextGenerationMetricDefaultOutputs]): +class TextGenerationMetricDefault(BaseMetric[TextGenerationMetricDefaultInputs, TextGenerationMetricDefaultOutputs]): """ The "Text Generation Metric Default" function provides a standard set of evaluation metrics for assessing the quality and performance of text generation @@ -2130,7 +2131,7 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextGenerationMetric(AssetNode[TextGenerationMetricInputs, TextGenerationMetricOutputs]): +class TextGenerationMetric(BaseMetric[TextGenerationMetricInputs, TextGenerationMetricOutputs]): """ A Text Generation Metric is a quantitative measure used to evaluate the quality and effectiveness of text produced by natural language processing models, often @@ -2981,7 +2982,7 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ReferencelessTextGenerationMetricDefault(AssetNode[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs]): +class ReferencelessTextGenerationMetricDefault(BaseMetric[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs]): """ The Referenceless Text Generation Metric Default is a function designed to evaluate the quality of generated text without relying on reference texts for @@ -3665,7 +3666,7 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.NUMBER) -class ClassificationMetric(AssetNode[ClassificationMetricInputs, ClassificationMetricOutputs]): +class ClassificationMetric(BaseMetric[ClassificationMetricInputs, ClassificationMetricOutputs]): """ A Classification Metric is a quantitative measure used to evaluate the quality and effectiveness of classification models. diff --git a/tests/functional/pipelines/designer_test.py b/tests/functional/pipelines/designer_test.py index 62f42f7e..f2ae92d5 100644 --- a/tests/functional/pipelines/designer_test.py +++ b/tests/functional/pipelines/designer_test.py @@ -1,7 +1,7 @@ import pytest from aixplain.enums import DataType -from aixplain.factories import PipelineFactory +from aixplain.factories import PipelineFactory, DatasetFactory from aixplain.modules.pipeline.designer import ( Link, Operation, @@ -246,3 +246,52 @@ def test_reconstructing_pipeline(pipeline): assert len(output["data"]) > 0 assert output["data"][0].get("segments") is not None assert len(output["data"][0]["segments"]) > 0 + + +def test_metric_pipeline(pipeline): + + dataset = DatasetFactory.list(query="for_functional_tests")["results"][0] + data_asset_id = dataset.id + reference_id = dataset.target_data["pt"][0].id + + # Instantiate input nodes + text_input_node = pipeline.input(label="TextInput") + reference_input_node = pipeline.input(label="ReferenceInput") + + # Instantiate the metric node + translation_metric_node = pipeline\ + .text_generation_metric(asset_id='639874ab506c987b1ae1acc6') + + # Instantiate output node + score_output_node = pipeline.output() + + # Link the nodes + text_input_node.link(translation_metric_node, + from_param='input', + to_param='hypotheses') + + reference_input_node.link(translation_metric_node, + from_param='input', + to_param='references') + + translation_metric_node.link(score_output_node, + from_param='data', + to_param='output') + + translation_metric_node.inputs.score_identifier = "bleu" + + # Save and run the pipeline + pipeline.save() + + output = pipeline.run(data={ + "TextInput": reference_id, "ReferenceInput": reference_id + }, data_asset={ + "TextInput": data_asset_id, "ReferenceInput": data_asset_id + } + ) + + assert output["status"] == "SUCCESS" + assert output.get("data") is not None + assert len(output["data"]) > 0 + assert output["data"][0].get("segments") is not None + assert len(output["data"][0]["segments"]) > 0 From 0e62774c9ab8eb20cb6f47240e9e70a5e86b38f2 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:51:36 -0300 Subject: [PATCH 037/105] Add TeamAgent factory and module. Fix typos in code comments (#227) * Add Community factory and module. Fix typos in code comments * Update ModelTool code comments * Rename Community to TeamAgent and add correct endpoints * Adding Team Agent creation function test * Update TeamAgent methods * Remove special characters from agent names --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/__init__.py | 1 + aixplain/factories/agent_factory/__init__.py | 20 +- aixplain/factories/agent_factory/utils.py | 4 +- .../factories/team_agent_factory/__init__.py | 165 ++++++++++++ .../factories/team_agent_factory/utils.py | 34 +++ aixplain/modules/__init__.py | 1 + aixplain/modules/agent/__init__.py | 17 +- aixplain/modules/agent/tool/__init__.py | 2 +- aixplain/modules/agent/tool/model_tool.py | 17 +- aixplain/modules/agent/tool/pipeline_tool.py | 2 +- aixplain/modules/team_agent/__init__.py | 246 ++++++++++++++++++ .../data_onboarding/onboard_functions.py | 10 +- .../functional/agent/agent_functional_test.py | 9 +- .../agent/data/agent_test_end2end.json | 2 +- .../data/team_agent_test_end2end.json | 32 +++ .../team_agent/team_agent_functional_test.py | 94 +++++++ tests/unit/agent_test.py | 12 +- tests/unit/team_agent_test.py | 73 ++++++ 18 files changed, 693 insertions(+), 48 deletions(-) create mode 100644 aixplain/factories/team_agent_factory/__init__.py create mode 100644 aixplain/factories/team_agent_factory/utils.py create mode 100644 aixplain/modules/team_agent/__init__.py create mode 100644 tests/functional/team_agent/data/team_agent_test_end2end.json create mode 100644 tests/functional/team_agent/team_agent_functional_test.py create mode 100644 tests/unit/team_agent_test.py diff --git a/aixplain/factories/__init__.py b/aixplain/factories/__init__.py index 70361e77..d540f374 100644 --- a/aixplain/factories/__init__.py +++ b/aixplain/factories/__init__.py @@ -21,6 +21,7 @@ """ from .asset_factory import AssetFactory from .agent_factory import AgentFactory +from .team_agent_factory import TeamAgentFactory from .benchmark_factory import BenchmarkFactory from .corpus_factory import CorpusFactory from .data_factory import DataFactory diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 0c8fc50c..440219dd 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -44,25 +44,23 @@ class AgentFactory: def create( cls, name: Text, + description: Text, llm_id: Text = "669a63646eb56306647e1091", tools: List[Tool] = [], - description: Text = "", api_key: Text = config.TEAM_API_KEY, supplier: Union[Dict, Text, Supplier, int] = "aiXplain", version: Optional[Text] = None, - use_mentalist_and_inspector: bool = False, ) -> Agent: """Create a new agent in the platform. Args: name (Text): name of the agent + description (Text): description of the agent role. llm_id (Text, optional): aiXplain ID of the large language model to be used as agent. Defaults to "669a63646eb56306647e1091" (GPT-4o mini). tools (List[Tool], optional): list of tool for the agent. Defaults to []. - description (Text, optional): description of the agent role. Defaults to "". api_key (Text, optional): team/user API key. Defaults to config.TEAM_API_KEY. supplier (Union[Dict, Text, Supplier, int], optional): owner of the agent. Defaults to "aiXplain". version (Optional[Text], optional): version of the agent. Defaults to None. - use_mentalist_and_inspector (bool, optional): flag to enable mentalist and inspector agents (which only works when a supervisor is enabled). Defaults to False. Returns: Agent: created Agent @@ -70,10 +68,6 @@ def create( # validate LLM ID validate_llm(llm_id) - orchestrator_llm_id, mentalist_and_inspector_llm_id = llm_id, None - if use_mentalist_and_inspector is True: - mentalist_and_inspector_llm_id = llm_id - try: agent = None url = urljoin(config.BACKEND_URL, "sdk/agents") @@ -117,8 +111,6 @@ def create( "supplier": supplier, "version": version, "llmId": llm_id, - "supervisorId": orchestrator_llm_id, - "plannerId": mentalist_and_inspector_llm_id, } logging.info(f"Start service for POST Create Agent - {url} - {headers} - {json.dumps(payload)}") @@ -128,13 +120,13 @@ def create( agent = build_agent(payload=response, api_key=api_key) else: error = r.json() - error_msg = "Agent Onboarding Error: Please contant the administrators." + error_msg = "Agent Onboarding Error: Please contact the administrators." if "message" in error: msg = error["message"] if error["message"] == "err.name_already_exists": msg = "Agent name already exists." elif error["message"] == "err.asset_is_not_available": - msg = "Some the tools are not available." + msg = "Some tools are not available." error_msg = f"Agent Onboarding Error (HTTP {r.status_code}): {msg}" logging.exception(error_msg) raise Exception(error_msg) @@ -190,7 +182,7 @@ def list(cls) -> Dict: agents.append(build_agent(agent)) return {"results": agents, "page_total": page_total, "page_number": 0, "total": total} else: - error_msg = "Agent Listing Error: Please contant the administrators." + error_msg = "Agent Listing Error: Please contact the administrators." if "message" in resp: msg = resp["message"] error_msg = f"Agent Listing Error (HTTP {r.status_code}): {msg}" @@ -214,7 +206,7 @@ def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> Agent: if 200 <= r.status_code < 300: return build_agent(resp) else: - msg = "Please contant the administrators." + msg = "Please contact the administrators." if "message" in resp: msg = resp["message"] error_msg = f"Agent Get Error (HTTP {r.status_code}): {msg}" diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index 6aed75ae..4a48a3b9 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -7,6 +7,8 @@ from typing import Dict, Text from urllib.parse import urljoin +GPT_4o_ID = "6646261c6eb563165658bbb1" + def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: """Instantiate a new agent in the platform.""" @@ -41,7 +43,7 @@ def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: supplier=payload["teamId"] if "teamId" in payload else None, version=payload["version"] if "version" in payload else None, cost=payload["cost"] if "cost" in payload else None, - llm_id=payload["llmId"] if "llmId" in payload else "6646261c6eb563165658bbb1", + llm_id=payload["llmId"] if "llmId" in payload else GPT_4o_ID, api_key=api_key, status=AssetStatus(payload["status"]), ) diff --git a/aixplain/factories/team_agent_factory/__init__.py b/aixplain/factories/team_agent_factory/__init__.py new file mode 100644 index 00000000..9baf3cf0 --- /dev/null +++ b/aixplain/factories/team_agent_factory/__init__.py @@ -0,0 +1,165 @@ +__author__ = "lucaspavanelli" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Thiago Castro Ferreira and Lucas Pavanelli +Date: August 15th 2024 +Description: + TeamAgent Factory Class +""" + +import json +import logging + +from aixplain.enums.supplier import Supplier +from aixplain.factories.agent_factory import AgentFactory +from aixplain.factories.agent_factory.utils import validate_llm +from aixplain.modules.agent import Agent +from aixplain.modules.team_agent import TeamAgent +from aixplain.utils import config +from aixplain.factories.team_agent_factory.utils import build_team_agent +from aixplain.utils.file_utils import _request_with_retry +from typing import Dict, List, Optional, Text, Union +from urllib.parse import urljoin + + +class TeamAgentFactory: + @classmethod + def create( + cls, + name: Text, + agents: List[Union[Text, Agent]], + llm_id: Text = "669a63646eb56306647e1091", + description: Text = "", + api_key: Text = config.TEAM_API_KEY, + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", + version: Optional[Text] = None, + use_mentalist_and_inspector: bool = True, + ) -> TeamAgent: + """Create a new team agent in the platform.""" + # validate LLM ID + validate_llm(llm_id) + assert len(agents) > 0, "TeamAgent Onboarding Error: At least one agent must be provided." + for agent in agents: + if isinstance(agent, Text) is True: + try: + agent = AgentFactory.get(agent) + except Exception: + raise Exception(f"TeamAgent Onboarding Error: Agent {agent} does not exist.") + else: + assert isinstance(agent, Agent), "TeamAgent Onboarding Error: Agents must be instances of Agent class" + + mentalist_and_inspector_llm_id = None + if use_mentalist_and_inspector is True: + mentalist_and_inspector_llm_id = llm_id + try: + team_agent = None + url = urljoin(config.BACKEND_URL, "sdk/agent-communities") + headers = {"x-api-key": api_key} + + if isinstance(supplier, dict): + supplier = supplier["code"] + elif isinstance(supplier, Supplier): + supplier = supplier.value["code"] + + agent_list = [] + for idx, agent in enumerate(agents): + agent_list.append({"assetId": agent.id, "number": idx, "type": "AGENT", "label": "AGENT"}) + + payload = { + "name": name, + "agents": agent_list, + "links": [], + "description": description, + "llmId": llm_id, + "supervisorId": llm_id, + "plannerId": mentalist_and_inspector_llm_id, + "supplier": supplier, + "version": version, + } + + logging.info(f"Start service for POST Create TeamAgent - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) + if 200 <= r.status_code < 300: + response = r.json() + team_agent = build_team_agent(payload=response, api_key=api_key) + else: + error = r.json() + error_msg = "TeamAgent Onboarding Error: Please contact the administrators." + if "message" in error: + msg = error["message"] + if error["message"] == "err.name_already_exists": + msg = "TeamAgent name already exists." + elif error["message"] == "err.asset_is_not_available": + msg = "Some tools are not available." + error_msg = f"TeamAgent Onboarding Error (HTTP {r.status_code}): {msg}" + logging.exception(error_msg) + raise Exception(error_msg) + except Exception as e: + raise Exception(e) + return team_agent + + @classmethod + def list(cls) -> Dict: + """List all agents available in the platform.""" + url = urljoin(config.BACKEND_URL, "sdk/agent-communities") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + + payload = {} + logging.info(f"Start service for GET List Agents - {url} - {headers} - {json.dumps(payload)}") + try: + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + + if 200 <= r.status_code < 300: + agents, page_total, total = [], 0, 0 + results = resp + page_total = len(results) + total = len(results) + logging.info(f"Response for GET List Agents - Page Total: {page_total} / Total: {total}") + for agent in results: + agents.append(build_team_agent(agent)) + return {"results": agents, "page_total": page_total, "page_number": 0, "total": total} + else: + error_msg = "Agent Listing Error: Please contact the administrators." + if "message" in resp: + msg = resp["message"] + error_msg = f"Agent Listing Error (HTTP {r.status_code}): {msg}" + logging.exception(error_msg) + raise Exception(error_msg) + except Exception as e: + raise Exception(e) + + @classmethod + def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> Agent: + """Get agent by id.""" + url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{agent_id}") + if config.AIXPLAIN_API_KEY != "": + headers = {"x-aixplain-key": f"{config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} + else: + api_key = api_key if api_key is not None else config.TEAM_API_KEY + headers = {"x-api-key": api_key, "Content-Type": "application/json"} + logging.info(f"Start service for GET Agent - {url} - {headers}") + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + if 200 <= r.status_code < 300: + return build_team_agent(resp) + else: + msg = "Please contact the administrators." + if "message" in resp: + msg = resp["message"] + error_msg = f"Agent Get Error (HTTP {r.status_code}): {msg}" + raise Exception(error_msg) diff --git a/aixplain/factories/team_agent_factory/utils.py b/aixplain/factories/team_agent_factory/utils.py new file mode 100644 index 00000000..42fa5f6c --- /dev/null +++ b/aixplain/factories/team_agent_factory/utils.py @@ -0,0 +1,34 @@ +__author__ = "lucaspavanelli" + +import aixplain.utils.config as config +from aixplain.enums.asset_status import AssetStatus +from aixplain.modules.team_agent import TeamAgent +from aixplain.factories.agent_factory import AgentFactory +from typing import Dict, Text +from urllib.parse import urljoin + +GPT_4o_ID = "6646261c6eb563165658bbb1" + + +def build_team_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> TeamAgent: + """Instantiate a new team agent in the platform.""" + agents = payload["agents"] + for i, agent in enumerate(agents): + agent = AgentFactory.get(agent["assetId"]) + agents[i] = agent + + team_agent = TeamAgent( + id=payload["id"], + name=payload["name"] if "name" in payload else "", + agents=agents, + description=payload["description"] if "description" in payload else "", + supplier=payload["teamId"] if "teamId" in payload else None, + version=payload["version"] if "version" in payload else None, + cost=payload["cost"] if "cost" in payload else None, + llm_id=payload["llmId"] if "llmId" in payload else GPT_4o_ID, + use_mentalist_and_inspector=True if "plannerId" in payload and payload["plannerId"] is not None else False, + api_key=api_key, + status=AssetStatus(payload["status"]), + ) + team_agent.url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{team_agent.id}/run") + return team_agent diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index c7246dac..bad0c225 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -35,3 +35,4 @@ from .benchmark_job import BenchmarkJob from .agent import Agent from .agent.tool import Tool +from .team_agent import TeamAgent diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index c0604f6a..546ea4d8 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -47,7 +47,7 @@ class Agent(Model): name (Text): Name of the Agent tools (List[Tool]): List of tools that the Agent uses. description (Text, optional): description of the Agent. Defaults to "". - llm_id (Text, optional): large language model. Defaults to GPT-4o (6646261c6eb563165658bbb1). + llm_id (Text): large language model. Defaults to GPT-4o (6646261c6eb563165658bbb1). supplier (Text): Supplier of the Agent. version (Text): Version of the Agent. backend_url (str): URL of the backend. @@ -59,8 +59,8 @@ def __init__( self, id: Text, name: Text, + description: Text, tools: List[Tool] = [], - description: Text = "", llm_id: Text = "6646261c6eb563165658bbb1", api_key: Optional[Text] = config.TEAM_API_KEY, supplier: Union[Dict, Text, Supplier, int] = "aiXplain", @@ -69,13 +69,13 @@ def __init__( status: AssetStatus = AssetStatus.ONBOARDING, **additional_info, ) -> None: - """Create a FineTune with the necessary information. + """Create an Agent with the necessary information. Args: id (Text): ID of the Agent name (Text): Name of the Agent + description (Text): description of the Agent. tools (List[Tool]): List of tools that the Agent uses. - description (Text, optional): description of the Agent. Defaults to "". llm_id (Text, optional): large language model. Defaults to GPT-4o (6646261c6eb563165658bbb1). supplier (Text): Supplier of the Agent. version (Text): Version of the Agent. @@ -83,7 +83,6 @@ def __init__( api_key (str): The TEAM API key used for authentication. cost (Dict, optional): model price. Defaults to None. """ - # assert len(tools) > 0, "At least one tool must be provided." super().__init__(id, name, description, api_key, supplier, version, cost=cost) self.additional_info = additional_info self.tools = tools @@ -144,7 +143,7 @@ def run( return response except Exception as e: msg = f"Error in request for {name} - {traceback.format_exc()}" - logging.error(f"Model Run: Error in running for {name}: {e}") + logging.error(f"Agent Run: Error in running for {name}: {e}") end = time.time() return {"status": "FAILED", "error": msg, "elapsed_time": end - start} @@ -210,7 +209,7 @@ def run_async( payload = json.dumps(payload) r = _request_with_retry("post", self.url, headers=headers, data=payload) - logging.info(f"Model Run Async: Start service for {name} - {self.url} - {payload} - {headers}") + logging.info(f"Agent Run Async: Start service for {name} - {self.url} - {payload} - {headers}") resp = None try: @@ -222,13 +221,13 @@ def run_async( except Exception: response = {"status": "FAILED"} msg = f"Error in request for {name} - {traceback.format_exc()}" - logging.error(f"Model Run Async: Error in running for {name}: {resp}") + logging.error(f"Agent Run Async: Error in running for {name}: {resp}") if resp is not None: response["error"] = msg return response def delete(self) -> None: - """Delete Corpus service""" + """Delete Agent service""" try: url = urljoin(config.BACKEND_URL, f"sdk/agents/{self.id}") headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} diff --git a/aixplain/modules/agent/tool/__init__.py b/aixplain/modules/agent/tool/__init__.py index 2a22511a..9c7a7a09 100644 --- a/aixplain/modules/agent/tool/__init__.py +++ b/aixplain/modules/agent/tool/__init__.py @@ -29,7 +29,7 @@ class Tool(ABC): Attributes: name (Text): name of the tool - description (Text): descriptiion of the tool + description (Text): description of the tool version (Text): version of the tool """ diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index c88f1ee0..3a84c45b 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -20,7 +20,7 @@ Description: Agentification Class """ -from typing import Optional, Union, Text +from typing import Optional, Union, Text, Dict from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier @@ -32,23 +32,24 @@ class ModelTool(Tool): """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. Attributes: - function (Function): task that the tool performs - supplier (Optional[Union[Dict, Text, Supplier, int]], optional): Preferred supplier to perform the task. Defaults to None. + function (Optional[Union[Function, Text]]): task that the tool performs. + supplier (Optional[Union[Dict, Supplier]]): Preferred supplier to perform the task. + model (Optional[Union[Text, Model]]): Model function. """ def __init__( self, - function: Optional[Function] = None, - supplier: Optional[Supplier] = None, + function: Optional[Union[Function, Text]] = None, + supplier: Optional[Union[Dict, Supplier]] = None, model: Optional[Union[Text, Model]] = None, **additional_info, ) -> None: """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. Args: - function (Optional[Function], optional): task that the tool performs. Defaults to None. - supplier (Optional[Supplier], optional): Preferred supplier to perform the task. Defaults to None.. Defaults to None. - model (Optional[Union[Text, Model]], optional): Model function. Defaults to None. + function (Optional[Union[Function, Text]]): task that the tool performs. Defaults to None. + supplier (Optional[Union[Dict, Supplier]]): Preferred supplier to perform the task. Defaults to None. Defaults to None. + model (Optional[Union[Text, Model]]): Model function. Defaults to None. """ assert ( function is not None or model is not None diff --git a/aixplain/modules/agent/tool/pipeline_tool.py b/aixplain/modules/agent/tool/pipeline_tool.py index 5ad2915a..fa8394ea 100644 --- a/aixplain/modules/agent/tool/pipeline_tool.py +++ b/aixplain/modules/agent/tool/pipeline_tool.py @@ -30,7 +30,7 @@ class PipelineTool(Tool): """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. Attributes: - description (Text): descriptiion of the tool + description (Text): description of the tool pipeline (Union[Text, Pipeline]): pipeline """ diff --git a/aixplain/modules/team_agent/__init__.py b/aixplain/modules/team_agent/__init__.py new file mode 100644 index 00000000..420fc23a --- /dev/null +++ b/aixplain/modules/team_agent/__init__.py @@ -0,0 +1,246 @@ +__author__ = "aiXplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Lucas Pavanelli and Thiago Castro Ferreira +Date: August 15th 2024 +Description: + Team Agent Class +""" + +import json +import logging +import time +import traceback + +from aixplain.utils.file_utils import _request_with_retry +from aixplain.enums.supplier import Supplier +from aixplain.enums.asset_status import AssetStatus +from aixplain.enums.storage_type import StorageType +from aixplain.modules.model import Model +from aixplain.modules.agent import Agent +from typing import Dict, List, Text, Optional, Union +from urllib.parse import urljoin + +from aixplain.utils import config + + +class TeamAgent(Model): + """Advanced AI system capable of using multiple agents to perform a variety of tasks. + + Attributes: + id (Text): ID of the Team Agent + name (Text): Name of the Team Agent + agents (List[Agent]): List of Agents that the Team Agent uses. + description (Text, optional): description of the Team Agent. Defaults to "". + llm_id (Text, optional): large language model. Defaults to GPT-4o (6646261c6eb563165658bbb1). + supplier (Text): Supplier of the Team Agent. + version (Text): Version of the Team Agent. + backend_url (str): URL of the backend. + api_key (str): The TEAM API key used for authentication. + cost (Dict, optional): model price. Defaults to None. + use_mentalist_and_inspector (bool): Use Mentalist and Inspector tools. Defaults to True. + """ + + def __init__( + self, + id: Text, + name: Text, + agents: List[Agent] = [], + description: Text = "", + llm_id: Text = "6646261c6eb563165658bbb1", + api_key: Optional[Text] = config.TEAM_API_KEY, + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", + version: Optional[Text] = None, + cost: Optional[Dict] = None, + use_mentalist_and_inspector: bool = True, + status: AssetStatus = AssetStatus.ONBOARDING, + **additional_info, + ) -> None: + """Create a FineTune with the necessary information. + + Args: + id (Text): ID of the Team Agent + name (Text): Name of the Team Agent + agents (List[Agent]): List of agents that the Team Agent uses. + description (Text, optional): description of the Team Agent. Defaults to "". + llm_id (Text, optional): large language model. Defaults to GPT-4o (6646261c6eb563165658bbb1). + supplier (Text): Supplier of the Team Agent. + version (Text): Version of the Team Agent. + backend_url (str): URL of the backend. + api_key (str): The TEAM API key used for authentication. + cost (Dict, optional): model price. Defaults to None. + use_mentalist_and_inspector (bool): Use Mentalist and Inspector tools. Defaults to True. + """ + super().__init__(id, name, description, api_key, supplier, version, cost=cost) + self.additional_info = additional_info + self.agents = agents + self.llm_id = llm_id + self.use_mentalist_and_inspector = use_mentalist_and_inspector + if isinstance(status, str): + try: + status = AssetStatus(status) + except Exception: + status = AssetStatus.ONBOARDING + self.status = status + + def run( + self, + data: Optional[Union[Dict, Text]] = None, + query: Optional[Text] = None, + session_id: Optional[Text] = None, + history: Optional[List[Dict]] = None, + name: Text = "model_process", + timeout: float = 300, + parameters: Dict = {}, + wait_time: float = 0.5, + content: Optional[Union[Dict[Text, Text], List[Text]]] = None, + ) -> Dict: + """Runs a team agent call. + + Args: + data (Optional[Union[Dict, Text]], optional): data to be processed by the team agent. Defaults to None. + query (Optional[Text], optional): query to be processed by the team agent. Defaults to None. + session_id (Optional[Text], optional): conversation Session ID. Defaults to None. + history (Optional[List[Dict]], optional): chat history (in case session ID is None). Defaults to None. + name (Text, optional): ID given to a call. Defaults to "model_process". + timeout (float, optional): total polling time. Defaults to 300. + parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. + content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. + + Returns: + Dict: parsed output from model + """ + start = time.time() + try: + response = self.run_async( + data=data, + query=query, + session_id=session_id, + history=history, + name=name, + parameters=parameters, + content=content, + ) + if response["status"] == "FAILED": + end = time.time() + response["elapsed_time"] = end - start + return response + poll_url = response["url"] + end = time.time() + response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) + return response + except Exception as e: + msg = f"Error in request for {name} - {traceback.format_exc()}" + logging.error(f"Team Agent Run: Error in running for {name}: {e}") + end = time.time() + return {"status": "FAILED", "error": msg, "elapsed_time": end - start} + + def run_async( + self, + data: Optional[Union[Dict, Text]] = None, + query: Optional[Text] = None, + session_id: Optional[Text] = None, + history: Optional[List[Dict]] = None, + name: Text = "model_process", + parameters: Dict = {}, + content: Optional[Union[Dict[Text, Text], List[Text]]] = None, + ) -> Dict: + """Runs asynchronously a Team Agent call. + + Args: + data (Optional[Union[Dict, Text]], optional): data to be processed by the Team Agent. Defaults to None. + query (Optional[Text], optional): query to be processed by the Team Agent. Defaults to None. + session_id (Optional[Text], optional): conversation Session ID. Defaults to None. + history (Optional[List[Dict]], optional): chat history (in case session ID is None). Defaults to None. + name (Text, optional): ID given to a call. Defaults to "model_process". + parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. + + Returns: + dict: polling URL in response + """ + from aixplain.factories.file_factory import FileFactory + + assert data is not None or query is not None, "Either 'data' or 'query' must be provided." + if data is not None: + if isinstance(data, dict): + assert "query" in data and data["query"] is not None, "When providing a dictionary, 'query' must be provided." + query = data.get("query") + if session_id is None: + session_id = data.get("session_id") + if history is None: + history = data.get("history") + if content is None: + content = data.get("content") + else: + query = data + + # process content inputs + if content is not None: + assert FileFactory.check_storage_type(query) == StorageType.TEXT, "When providing 'content', query must be text." + + if isinstance(content, list): + assert len(content) <= 3, "The maximum number of content inputs is 3." + for input_link in content: + input_link = FileFactory.to_link(input_link) + query += f"\n{input_link}" + elif isinstance(content, dict): + for key, value in content.items(): + assert "{{" + key + "}}" in query, f"Key '{key}' not found in query." + value = FileFactory.to_link(value) + query = query.replace("{{" + key + "}}", f"'{value}'") + + headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + + payload = {"id": self.id, "query": FileFactory.to_link(query), "sessionId": session_id, "history": history} + payload.update(parameters) + payload = json.dumps(payload) + + r = _request_with_retry("post", self.url, headers=headers, data=payload) + logging.info(f"Team Agent Run Async: Start service for {name} - {self.url} - {payload} - {headers}") + + resp = None + try: + resp = r.json() + logging.info(f"Result of request for {name} - {r.status_code} - {resp}") + + poll_url = resp["data"] + response = {"status": "IN_PROGRESS", "url": poll_url} + except Exception: + response = {"status": "FAILED"} + msg = f"Error in request for {name} - {traceback.format_exc()}" + logging.error(f"Team Agent Run Async: Error in running for {name}: {resp}") + if resp is not None: + response["error"] = msg + return response + + def delete(self) -> None: + """Delete Corpus service""" + try: + url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{self.id}") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + logging.debug(f"Start service for DELETE Team Agent - {url} - {headers}") + r = _request_with_retry("delete", url, headers=headers) + if r.status_code != 200: + raise Exception() + except Exception: + message = ( + f"Team Agent Deletion Error (HTTP {r.status_code}): Make sure the Team Agent exists and you are the owner." + ) + logging.error(message) + raise Exception(f"{message}") diff --git a/aixplain/processes/data_onboarding/onboard_functions.py b/aixplain/processes/data_onboarding/onboard_functions.py index 35a64e12..01a3fe9b 100644 --- a/aixplain/processes/data_onboarding/onboard_functions.py +++ b/aixplain/processes/data_onboarding/onboard_functions.py @@ -325,9 +325,9 @@ def create_data_asset(payload: Dict, data_asset_type: Text = "corpus", api_key: response = r.json() msg = response["message"] error_msg = f"Data Asset Onboarding Error: {msg}" - except Exception as e: + except Exception: error_msg = ( - f"Data Asset Onboarding Error: Failure on creating the {data_asset_type}. Please contant the administrators." + f"Data Asset Onboarding Error: Failure on creating the {data_asset_type}. Please contact the administrators." ) return {"success": False, "error": error_msg} @@ -352,7 +352,7 @@ def is_data(data_id: Text) -> bool: if "id" in resp: return True return False - except: + except Exception: return False @@ -379,13 +379,13 @@ def split_data(paths: List, split_rate: List[float], split_labels: List[Text]) - if column_name is not None: break - except Exception as e: + except Exception: message = f'Data Asset Onboarding Error: Local file "{path}" not found.' logging.exception(message) raise Exception(message) if column_name is None: - message = f"Data Asset Onboarding Error: All split names are used." + message = "Data Asset Onboarding Error: All split names are used." raise Exception(message) for path in paths: diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 491977e8..0d433899 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -56,7 +56,9 @@ def test_end2end(run_input_map): for tool in run_input_map["pipeline_tools"]: tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) print(f"Creating agent with tools: {tools}") - agent = AgentFactory.create(name=run_input_map["agent_name"], llm_id=run_input_map["llm_id"], tools=tools) + agent = AgentFactory.create( + name=run_input_map["agent_name"], description=run_input_map["agent_name"], llm_id=run_input_map["llm_id"], tools=tools + ) print(f"Agent created: {agent.__dict__}") print("Running agent") response = agent.run(data=run_input_map["query"]) @@ -81,6 +83,9 @@ def test_list_agents(): def test_fail_non_existent_llm(): with pytest.raises(Exception) as exc_info: AgentFactory.create( - name="Test Agent", llm_id="non_existent_llm", tools=[AgentFactory.create_model_tool(function=Function.TRANSLATION)] + name="Test Agent", + description="Test description", + llm_id="non_existent_llm", + tools=[AgentFactory.create_model_tool(function=Function.TRANSLATION)], ) assert str(exc_info.value) == "Large Language Model with ID 'non_existent_llm' not found." diff --git a/tests/functional/agent/data/agent_test_end2end.json b/tests/functional/agent/data/agent_test_end2end.json index 94bfc94b..595d22a9 100644 --- a/tests/functional/agent/data/agent_test_end2end.json +++ b/tests/functional/agent/data/agent_test_end2end.json @@ -1,6 +1,6 @@ [ { - "agent_name": "[TEST] Translation agent", + "agent_name": "TEST Translation agent", "llm_id": "6626a3a8c8f1d089790cf5a2", "llm_name": "Groq Llama 3 70B", "query": "Who is the president of Brazil right now? Translate to pt", diff --git a/tests/functional/team_agent/data/team_agent_test_end2end.json b/tests/functional/team_agent/data/team_agent_test_end2end.json new file mode 100644 index 00000000..ed6437d2 --- /dev/null +++ b/tests/functional/team_agent/data/team_agent_test_end2end.json @@ -0,0 +1,32 @@ +[ + { + "team_agent_name": "TEST Multi agent", + "llm_id": "6626a3a8c8f1d089790cf5a2", + "llm_name": "Groq Llama 3 70B", + "query": "Who is the president of Brazil right now? Translate to pt and synthesize in audio", + "agents": [ + { + "agent_name": "TEST Translation agent", + "llm_id": "6626a3a8c8f1d089790cf5a2", + "llm_name": "Groq Llama 3 70B", + "model_tools": [ + { + "function": "translation", + "supplier": "AWS" + } + ] + }, + { + "agent_name": "TEST Speech Synthesis agent", + "llm_id": "6626a3a8c8f1d089790cf5a2", + "llm_name": "Groq Llama 3 70B", + "model_tools": [ + { + "function": "speech-synthesis", + "supplier": "Google" + } + ] + } + ] + } +] diff --git a/tests/functional/team_agent/team_agent_functional_test.py b/tests/functional/team_agent/team_agent_functional_test.py new file mode 100644 index 00000000..46adfcbc --- /dev/null +++ b/tests/functional/team_agent/team_agent_functional_test.py @@ -0,0 +1,94 @@ +__author__ = "lucaspavanelli" + +""" +Copyright 2022 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import json +from dotenv import load_dotenv + +load_dotenv() +from aixplain.factories import AgentFactory, TeamAgentFactory +from aixplain.enums.function import Function +from aixplain.enums.supplier import Supplier + +import pytest + +RUN_FILE = "tests/functional/team_agent/data/team_agent_test_end2end.json" + + +def read_data(data_path): + return json.load(open(data_path, "r")) + + +@pytest.fixture(scope="module", params=read_data(RUN_FILE)) +def run_input_map(request): + return request.param + + +def test_end2end(run_input_map): + for agent in AgentFactory.list()["results"]: + agent.delete() + + agents = [] + for agent in run_input_map["agents"]: + tools = [] + if "model_tools" in agent: + for tool in agent["model_tools"]: + for supplier in Supplier: + if tool["supplier"] is not None and tool["supplier"].lower() in [ + supplier.value["code"].lower(), + supplier.value["name"].lower(), + ]: + tool["supplier"] = supplier + break + tools.append(AgentFactory.create_model_tool(**tool)) + if "pipeline_tools" in agent: + for tool in agent["pipeline_tools"]: + tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) + print(f"Creating agent with tools: {tools}") + agent = AgentFactory.create( + name=agent["agent_name"], description=agent["agent_name"], llm_id=agent["llm_id"], tools=tools + ) + agents.append(agent) + + team_agent = TeamAgentFactory.create( + name=run_input_map["team_agent_name"], + agents=agents, + description=run_input_map["team_agent_name"], + llm_id=run_input_map["llm_id"], + use_mentalist_and_inspector=True, + ) + print("Running team agent") + response = team_agent.run(data=run_input_map["query"]) + print(f"Team Agent response: {response}") + assert response is not None + assert response["completed"] is True + assert response["status"].lower() == "success" + assert "data" in response + assert response["data"]["session_id"] is not None + assert response["data"]["output"] is not None + print("Deleting team agent") + team_agent.delete() + + +def test_fail_non_existent_llm(): + with pytest.raises(Exception) as exc_info: + AgentFactory.create( + name="Test Agent", + description="", + llm_id="non_existent_llm", + tools=[AgentFactory.create_model_tool(function=Function.TRANSLATION)], + ) + assert str(exc_info.value) == "Large Language Model with ID 'non_existent_llm' not found." diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 9606a3e5..61ac1864 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -8,21 +8,21 @@ def test_fail_no_data_query(): - agent = Agent("123", "Test Agent") + agent = Agent("123", "Test Agent", "Sample Description") with pytest.raises(Exception) as exc_info: agent.run_async() assert str(exc_info.value) == "Either 'data' or 'query' must be provided." def test_fail_query_must_be_provided(): - agent = Agent("123", "Test Agent") + agent = Agent("123", "Test Agent", "Sample Description") with pytest.raises(Exception) as exc_info: agent.run_async(data={}) assert str(exc_info.value) == "When providing a dictionary, 'query' must be provided." def test_fail_query_as_text_when_content_not_empty(): - agent = Agent("123", "Test Agent") + agent = Agent("123", "Test Agent", "Sample Description") with pytest.raises(Exception) as exc_info: agent.run_async( data={"query": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav"}, @@ -32,7 +32,7 @@ def test_fail_query_as_text_when_content_not_empty(): def test_fail_content_exceed_maximum(): - agent = Agent("123", "Test Agent") + agent = Agent("123", "Test Agent", "Sample Description") with pytest.raises(Exception) as exc_info: agent.run_async( data={"query": "Transcribe the audios:"}, @@ -47,14 +47,14 @@ def test_fail_content_exceed_maximum(): def test_fail_key_not_found(): - agent = Agent("123", "Test Agent") + agent = Agent("123", "Test Agent", "Sample Description") with pytest.raises(Exception) as exc_info: agent.run_async(data={"query": "Translate the text: {{input1}}"}, content={"input2": "Hello, how are you?"}) assert str(exc_info.value) == "Key 'input2' not found in query." def test_sucess_query_content(): - agent = Agent("123", "Test Agent") + agent = Agent("123", "Test Agent", "Sample Description") with requests_mock.Mocker() as mock: url = agent.url headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} diff --git a/tests/unit/team_agent_test.py b/tests/unit/team_agent_test.py new file mode 100644 index 00000000..fd738c04 --- /dev/null +++ b/tests/unit/team_agent_test.py @@ -0,0 +1,73 @@ +import pytest +import requests_mock +from aixplain.modules import TeamAgent +from aixplain.factories import TeamAgentFactory +from aixplain.utils import config + + +def test_fail_no_data_query(): + team_agent = TeamAgent("123", "Test Team Agent") + with pytest.raises(Exception) as exc_info: + team_agent.run_async() + assert str(exc_info.value) == "Either 'data' or 'query' must be provided." + + +def test_fail_query_must_be_provided(): + team_agent = TeamAgent("123", "Test Team Agent") + with pytest.raises(Exception) as exc_info: + team_agent.run_async(data={}) + assert str(exc_info.value) == "When providing a dictionary, 'query' must be provided." + + +def test_fail_query_as_text_when_content_not_empty(): + team_agent = TeamAgent("123", "Test Team Agent") + with pytest.raises(Exception) as exc_info: + team_agent.run_async( + data={"query": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav"}, + content=["https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav"], + ) + assert str(exc_info.value) == "When providing 'content', query must be text." + + +def test_fail_content_exceed_maximum(): + team_agent = TeamAgent("123", "Test Team Agent") + with pytest.raises(Exception) as exc_info: + team_agent.run_async( + data={"query": "Transcribe the audios:"}, + content=[ + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + ], + ) + assert str(exc_info.value) == "The maximum number of content inputs is 3." + + +def test_fail_key_not_found(): + team_agent = TeamAgent("123", "Test Team Agent") + with pytest.raises(Exception) as exc_info: + team_agent.run_async(data={"query": "Translate the text: {{input1}}"}, content={"input2": "Hello, how are you?"}) + assert str(exc_info.value) == "Key 'input2' not found in query." + + +def test_sucess_query_content(): + team_agent = TeamAgent("123", "Test Team Agent") + with requests_mock.Mocker() as mock: + url = team_agent.url + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + ref_response = {"data": "Hello, how are you?", "status": "IN_PROGRESS"} + mock.post(url, headers=headers, json=ref_response) + + response = team_agent.run_async( + data={"query": "Translate the text: {{input1}}"}, content={"input1": "Hello, how are you?"} + ) + assert response["status"] == ref_response["status"] + assert response["url"] == ref_response["data"] + + +def test_fail_number_agents(): + with pytest.raises(Exception) as exc_info: + TeamAgentFactory.create(name="Test Team Agent", agents=[]) + + assert str(exc_info.value) == "TeamAgent Onboarding Error: At least one agent must be provided." From b93a706d900b6c26d76e4f6727fee6c23cd58f58 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 20 Sep 2024 10:45:48 -0300 Subject: [PATCH 038/105] Add fileMetadata information in script node (#251) --- aixplain/factories/pipeline_factory/utils.py | 2 +- aixplain/factories/script_factory.py | 9 ++-- aixplain/modules/pipeline/designer/nodes.py | 50 ++++++-------------- 3 files changed, 18 insertions(+), 43 deletions(-) diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py index aba93ef9..9584863f 100644 --- a/aixplain/factories/pipeline_factory/utils.py +++ b/aixplain/factories/pipeline_factory/utils.py @@ -56,7 +56,7 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipe elif node_json["type"].lower() == "router": node = Router(routes=[Route(**route) for route in node_json["routes"]]) elif node_json["type"].lower() == "script": - node = Script(fileId=node_json["fileId"]) + node = Script(fileId=node_json["fileId"], fileMetadata=node_json["fileMetadata"]) elif node_json["type"].lower() == "output": node = Output() diff --git a/aixplain/factories/script_factory.py b/aixplain/factories/script_factory.py index 35789561..14835752 100644 --- a/aixplain/factories/script_factory.py +++ b/aixplain/factories/script_factory.py @@ -8,15 +8,12 @@ class ScriptFactory: - @classmethod def upload_script(cls, script_path: str) -> Tuple[str, str]: try: url = f"{config.BACKEND_URL}/sdk/pipelines/script" headers = {"Authorization": f"Token {config.TEAM_API_KEY}"} - r = requests.post( - url, headers=headers, files={"file": open(script_path, "rb")} - ) + r = requests.post(url, headers=headers, files={"file": open(script_path, "rb")}) if 200 <= r.status_code < 300: response = r.json() else: @@ -26,6 +23,6 @@ def upload_script(cls, script_path: str) -> Tuple[str, str]: # get metadata info fname = os.path.splitext(os.path.basename(script_path))[0] - file_size_kb = int(os.path.getsize(script_path) / 1024) - metadata = json.dumps({"name": fname, "size": file_size_kb}) + file_size = int(os.path.getsize(script_path)) + metadata = json.dumps({"name": fname, "size": file_size}) return response["fileId"], metadata diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index eb8fecc5..7adcc3ec 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -55,7 +55,7 @@ def __init__( supplier: str = None, version: str = None, pipeline: "DesignerPipeline" = None, - **kwargs + **kwargs, ): super().__init__(pipeline=pipeline, **kwargs) self.asset_id = asset_id @@ -85,9 +85,7 @@ def populate_asset(self): if self.function: if self.asset.function.value != self.function: - raise ValueError( - f"Function {self.function} is not supported by asset {self.asset_id}" # noqa - ) + raise ValueError(f"Function {self.function} is not supported by asset {self.asset_id}") # noqa else: self.function = self.asset.function.value self._auto_populate_params() @@ -176,7 +174,7 @@ def __init__( data: Optional[str] = None, data_types: Optional[List[DataType]] = None, pipeline: "DesignerPipeline" = None, - **kwargs + **kwargs, ): from aixplain.factories.file_factory import FileFactory @@ -219,12 +217,7 @@ class Output(Node[OutputInputs, OutputOutputs]): inputs_class: Type[TI] = OutputInputs outputs_class: Type[TO] = OutputOutputs - def __init__( - self, - data_types: Optional[List[DataType]] = None, - pipeline: "DesignerPipeline" = None, - **kwargs - ): + def __init__(self, data_types: Optional[List[DataType]] = None, pipeline: "DesignerPipeline" = None, **kwargs): super().__init__(pipeline=pipeline, **kwargs) self.data_types = data_types or [] @@ -252,7 +245,8 @@ def __init__( pipeline: "DesignerPipeline" = None, script_path: Optional[str] = None, fileId: Optional[str] = None, - **kwargs + fileMetadata: Optional[str] = None, + **kwargs, ): from aixplain.factories.script_factory import ScriptFactory @@ -261,13 +255,15 @@ def __init__( assert script_path or fileId, "script_path or fileId is required" if not fileId: - self.fileId = ScriptFactory.upload_script(script_path) + self.fileId, self.fileMetadata = ScriptFactory.upload_script(script_path) else: self.fileId = fileId + self.fileMetadata = fileMetadata def serialize(self) -> dict: obj = super().serialize() obj["fileId"] = self.fileId + obj["fileMetadata"] = self.fileMetadata return obj @@ -282,14 +278,7 @@ class Route(Serializable): operation: Operation type: RouteType - def __init__( - self, - value: DataType, - path: List[Union[Node, int]], - operation: Operation, - type: RouteType, - **kwargs - ): + def __init__(self, value: DataType, path: List[Union[Node, int]], operation: Operation, type: RouteType, **kwargs): """ Post init method to convert the nodes to node numbers if they are nodes. @@ -303,10 +292,7 @@ def __init__( raise ValueError("Path is not valid, should be a list of nodes") # convert nodes to node numbers if they are nodes - self.path = [ - node.number if isinstance(node, Node) else node - for node in self.path - ] + self.path = [node.number if isinstance(node, Node) else node for node in self.path] def serialize(self) -> dict: return { @@ -344,10 +330,7 @@ class Router(Node[RouterInputs, RouterOutputs], LinkableMixin): inputs_class: Type[TI] = RouterInputs outputs_class: Type[TO] = RouterOutputs - def __init__( - self, routes: List[Route], pipeline: "DesignerPipeline" = None, - **kwargs - ): + def __init__(self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs): super().__init__(pipeline=pipeline, **kwargs) self.routes = routes @@ -386,10 +369,7 @@ class Decision(Node[DecisionInputs, DecisionOutputs], LinkableMixin): inputs_class: Type[TI] = DecisionInputs outputs_class: Type[TO] = DecisionOutputs - def __init__( - self, routes: List[Route], pipeline: "DesignerPipeline" = None, - **kwargs - ): + def __init__(self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs): super().__init__(pipeline=pipeline, **kwargs) self.routes = routes @@ -469,9 +449,7 @@ def __init__(self, node: Node): self.data = self.create_param("data") -class BareReconstructor( - BaseReconstructor[ReconstructorInputs, ReconstructorOutputs] -): +class BareReconstructor(BaseReconstructor[ReconstructorInputs, ReconstructorOutputs]): """ Reconstructor node class, this node will be used to reconstruct the output of the segmented lines of execution. From f7bd9837b90d3e101eeb53d91ce884e65e616cdd Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 23 Sep 2024 20:52:56 -0300 Subject: [PATCH 039/105] Name Validation of Agents and Team Agents (#253) --- aixplain/factories/agent_factory/__init__.py | 3 ++- aixplain/factories/agent_factory/utils.py | 8 ++++++++ aixplain/factories/team_agent_factory/__init__.py | 3 ++- tests/unit/agent_test.py | 6 ++++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 440219dd..2a16e191 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -34,7 +34,7 @@ from aixplain.utils import config from typing import Dict, List, Optional, Text, Union -from aixplain.factories.agent_factory.utils import build_agent, validate_llm +from aixplain.factories.agent_factory.utils import build_agent, validate_llm, validate_name from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin @@ -65,6 +65,7 @@ def create( Returns: Agent: created Agent """ + validate_name(name) # validate LLM ID validate_llm(llm_id) diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index 4a48a3b9..d86982ef 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -59,3 +59,11 @@ def validate_llm(model_id: Text) -> None: assert llm.function == Function.TEXT_GENERATION, "Large Language Model must be a text generation model." except Exception: raise Exception(f"Large Language Model with ID '{model_id}' not found.") + + +def validate_name(name: Text) -> None: + import re + + assert ( + re.match("^[a-zA-Z0-9 ]*$", name) is not None + ), "Agent Creation Error: Agent name must not contain special characters." diff --git a/aixplain/factories/team_agent_factory/__init__.py b/aixplain/factories/team_agent_factory/__init__.py index 9baf3cf0..72d47c03 100644 --- a/aixplain/factories/team_agent_factory/__init__.py +++ b/aixplain/factories/team_agent_factory/__init__.py @@ -26,7 +26,7 @@ from aixplain.enums.supplier import Supplier from aixplain.factories.agent_factory import AgentFactory -from aixplain.factories.agent_factory.utils import validate_llm +from aixplain.factories.agent_factory.utils import validate_llm, validate_name from aixplain.modules.agent import Agent from aixplain.modules.team_agent import TeamAgent from aixplain.utils import config @@ -50,6 +50,7 @@ def create( use_mentalist_and_inspector: bool = True, ) -> TeamAgent: """Create a new team agent in the platform.""" + validate_name(name) # validate LLM ID validate_llm(llm_id) assert len(agents) > 0, "TeamAgent Onboarding Error: At least one agent must be provided." diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 61ac1864..1be0682e 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -83,6 +83,12 @@ def test_invalid_modeltool(): assert str(exc_info.value) == "Model Tool Unavailable. Make sure Model '309851793' exists or you have access to it." +def test_invalid_agent_name(): + with pytest.raises(Exception) as exc_info: + AgentFactory.create(name="[Test]", description="", tools=[], llm_id="6646261c6eb563165658bbb1") + assert str(exc_info.value) == "Agent Creation Error: Agent name must not contain special characters." + + def test_create_agent(): from aixplain.enums import Supplier From b886287193706f7b8e1df54d564eacb81311bfac Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 26 Sep 2024 12:56:53 -0300 Subject: [PATCH 040/105] Fixes in pipeline design and reconstructor node (#255) * Fixes in pipeline design and reconstructor node * data -> text --- .../factories/pipeline_factory/__init__.py | 2 +- aixplain/modules/pipeline/asset.py | 4 +- aixplain/modules/pipeline/designer/enums.py | 11 ++-- aixplain/modules/pipeline/designer/nodes.py | 8 +-- tests/functional/pipelines/designer_test.py | 55 ++++++------------- 5 files changed, 28 insertions(+), 52 deletions(-) diff --git a/aixplain/factories/pipeline_factory/__init__.py b/aixplain/factories/pipeline_factory/__init__.py index 051c63fb..cb4336fe 100644 --- a/aixplain/factories/pipeline_factory/__init__.py +++ b/aixplain/factories/pipeline_factory/__init__.py @@ -290,7 +290,7 @@ def create( pipeline = json.load(f) for i, node in enumerate(pipeline["nodes"]): - if "functionType" in node and node["functionType"] == "AI": + if "functionType" in node: pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload payload = { diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index 6933f601..0e9ed56e 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -380,7 +380,7 @@ def update( pipeline = json.load(f) for i, node in enumerate(pipeline["nodes"]): - if "functionType" in node and node["functionType"] == "AI": + if "functionType" in node: pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload status = "draft" @@ -437,7 +437,7 @@ def save(self, save_as_asset: bool = False, api_key: Optional[Text] = None): pipeline = self.to_dict() for i, node in enumerate(pipeline["nodes"]): - if "functionType" in node and node["functionType"] == "AI": + if "functionType" in node: pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload status = "draft" diff --git a/aixplain/modules/pipeline/designer/enums.py b/aixplain/modules/pipeline/designer/enums.py index a3a07a40..fe4cbfed 100644 --- a/aixplain/modules/pipeline/designer/enums.py +++ b/aixplain/modules/pipeline/designer/enums.py @@ -22,8 +22,6 @@ class NodeType(str, Enum): INPUT = "INPUT" OUTPUT = "OUTPUT" SCRIPT = "SCRIPT" - SEGMENTOR = "SEGMENT" - RECONSTRUCTOR = "RECONSTRUCT" ROUTER = "ROUTER" DECISION = "DECISION" @@ -33,10 +31,11 @@ class AssetType(str, Enum): class FunctionType(str, Enum): - AI = "AI" - SEGMENTOR = "SEGMENTOR" - RECONSTRUCTOR = "RECONSTRUCTOR" - METRIC = "METRIC" + AI = "ai" + SEGMENTOR = "segmentor" + RECONSTRUCTOR = "reconstructor" + UTILITY = "utility" + METRIC = "metric" class ParamType: diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index 7adcc3ec..a6879e04 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -395,7 +395,7 @@ class BaseSegmentor(AssetNode[TI, TO]): into smaller fragments for much easier and efficient processing. """ - type: NodeType = NodeType.SEGMENTOR + type: NodeType = NodeType.ASSET functionType: FunctionType = FunctionType.SEGMENTOR @@ -417,7 +417,7 @@ class BareSegmentor(BaseSegmentor[SegmentorInputs, SegmentorOutputs]): into smaller fragments for much easier and efficient processing. """ - type: NodeType = NodeType.SEGMENTOR + type: NodeType = NodeType.ASSET functionType: FunctionType = FunctionType.SEGMENTOR inputs_class: Type[TI] = SegmentorInputs outputs_class: Type[TO] = SegmentorOutputs @@ -429,7 +429,7 @@ class BaseReconstructor(AssetNode[TI, TO]): output of the segmented lines of execution. """ - type: NodeType = NodeType.RECONSTRUCTOR + type: NodeType = NodeType.ASSET functionType: FunctionType = FunctionType.RECONSTRUCTOR @@ -455,7 +455,7 @@ class BareReconstructor(BaseReconstructor[ReconstructorInputs, ReconstructorOutp output of the segmented lines of execution. """ - type: NodeType = NodeType.RECONSTRUCTOR + type: NodeType = NodeType.ASSET functionType: FunctionType = FunctionType.RECONSTRUCTOR inputs_class: Type[TI] = ReconstructorInputs outputs_class: Type[TO] = ReconstructorOutputs diff --git a/tests/functional/pipelines/designer_test.py b/tests/functional/pipelines/designer_test.py index f2ae92d5..d8caaf35 100644 --- a/tests/functional/pipelines/designer_test.py +++ b/tests/functional/pipelines/designer_test.py @@ -110,9 +110,7 @@ def test_routing_pipeline(pipeline): translation = pipeline.asset(TRANSLATION_ASSET) speech_recognition = pipeline.asset(SPEECH_RECOGNITION_ASSET) - input.route( - translation.inputs.text, speech_recognition.inputs.source_audio - ) + input.route(translation.inputs.text, speech_recognition.inputs.source_audio) translation.use_output("data") speech_recognition.use_output("data") @@ -135,17 +133,11 @@ def test_scripting_pipeline(pipeline): input = pipeline.input() - segmentor = pipeline.speaker_diarization_audio( - asset_id=SPEAKER_DIARIZATION_AUDIO_ASSET - ) + segmentor = pipeline.speaker_diarization_audio(asset_id=SPEAKER_DIARIZATION_AUDIO_ASSET) - speech_recognition = pipeline.speech_recognition( - asset_id=SPEECH_RECOGNITION_ASSET - ) + speech_recognition = pipeline.speech_recognition(asset_id=SPEECH_RECOGNITION_ASSET) - script = pipeline.script( - script_path="tests/functional/pipelines/data/script.py" - ) + script = pipeline.script(script_path="tests/functional/pipelines/data/script.py") script.inputs.create_param(code="transcripts", data_type=DataType.TEXT) script.inputs.create_param(code="speakers", data_type=DataType.LABEL) script.outputs.create_param(code="data", data_type=DataType.TEXT) @@ -177,9 +169,7 @@ def test_decision_pipeline(pipeline): input = pipeline.input() - sentiment_analysis = pipeline.sentiment_analysis( - asset_id=SENTIMENT_ANALYSIS_ASSET - ) + sentiment_analysis = pipeline.sentiment_analysis(asset_id=SENTIMENT_ANALYSIS_ASSET) positive_output = pipeline.output() negative_output = pipeline.output() @@ -220,19 +210,15 @@ def test_decision_pipeline(pipeline): def test_reconstructing_pipeline(pipeline): input = pipeline.input() - segmentor = pipeline.speaker_diarization_audio( - asset_id="62fab6ecb39cca09ca5bc365" - ) + segmentor = pipeline.speaker_diarization_audio(asset_id="62fab6ecb39cca09ca5bc365") - speech_recognition = pipeline.speech_recognition( - asset_id="60ddefab8d38c51c5885ee38" - ) + speech_recognition = pipeline.speech_recognition(asset_id="60ddefab8d38c51c5885ee38") - reconstructor = pipeline.bare_reconstructor() + reconstructor = pipeline.text_reconstruction(asset_id="636cf7ab0f8ddf0db97929e4") input.outputs.input.link(segmentor.inputs.audio) segmentor.outputs.audio.link(speech_recognition.inputs.source_audio) - speech_recognition.outputs.data.link(reconstructor.inputs.data) + speech_recognition.outputs.data.link(reconstructor.inputs.text) reconstructor.use_output("data") @@ -259,35 +245,26 @@ def test_metric_pipeline(pipeline): reference_input_node = pipeline.input(label="ReferenceInput") # Instantiate the metric node - translation_metric_node = pipeline\ - .text_generation_metric(asset_id='639874ab506c987b1ae1acc6') + translation_metric_node = pipeline.text_generation_metric(asset_id="639874ab506c987b1ae1acc6") # Instantiate output node score_output_node = pipeline.output() # Link the nodes - text_input_node.link(translation_metric_node, - from_param='input', - to_param='hypotheses') + text_input_node.link(translation_metric_node, from_param="input", to_param="hypotheses") - reference_input_node.link(translation_metric_node, - from_param='input', - to_param='references') + reference_input_node.link(translation_metric_node, from_param="input", to_param="references") - translation_metric_node.link(score_output_node, - from_param='data', - to_param='output') + translation_metric_node.link(score_output_node, from_param="data", to_param="output") translation_metric_node.inputs.score_identifier = "bleu" # Save and run the pipeline pipeline.save() - output = pipeline.run(data={ - "TextInput": reference_id, "ReferenceInput": reference_id - }, data_asset={ - "TextInput": data_asset_id, "ReferenceInput": data_asset_id - } + output = pipeline.run( + data={"TextInput": reference_id, "ReferenceInput": reference_id}, + data_asset={"TextInput": data_asset_id, "ReferenceInput": data_asset_id}, ) assert output["status"] == "SUCCESS" From 8bd64600bad82aeb2ff68643831215530fb3b2a8 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Sun, 29 Sep 2024 17:16:00 -0300 Subject: [PATCH 041/105] Add get method to agent and team agent tests (#259) * Add get method to agent and team agent tests * Remove print --- tests/functional/agent/agent_functional_test.py | 8 +++----- tests/functional/team_agent/team_agent_functional_test.py | 7 +++---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 0d433899..1a654832 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -55,21 +55,19 @@ def test_end2end(run_input_map): if "pipeline_tools" in run_input_map: for tool in run_input_map["pipeline_tools"]: tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) - print(f"Creating agent with tools: {tools}") agent = AgentFactory.create( name=run_input_map["agent_name"], description=run_input_map["agent_name"], llm_id=run_input_map["llm_id"], tools=tools ) - print(f"Agent created: {agent.__dict__}") - print("Running agent") + assert agent is not None + agent = AgentFactory.get(agent.id) + assert agent is not None response = agent.run(data=run_input_map["query"]) - print(f"Agent response: {response}") assert response is not None assert response["completed"] is True assert response["status"].lower() == "success" assert "data" in response assert response["data"]["session_id"] is not None assert response["data"]["output"] is not None - print("Deleting agent") agent.delete() diff --git a/tests/functional/team_agent/team_agent_functional_test.py b/tests/functional/team_agent/team_agent_functional_test.py index 46adfcbc..520f7a1d 100644 --- a/tests/functional/team_agent/team_agent_functional_test.py +++ b/tests/functional/team_agent/team_agent_functional_test.py @@ -57,7 +57,6 @@ def test_end2end(run_input_map): if "pipeline_tools" in agent: for tool in agent["pipeline_tools"]: tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) - print(f"Creating agent with tools: {tools}") agent = AgentFactory.create( name=agent["agent_name"], description=agent["agent_name"], llm_id=agent["llm_id"], tools=tools ) @@ -70,16 +69,16 @@ def test_end2end(run_input_map): llm_id=run_input_map["llm_id"], use_mentalist_and_inspector=True, ) - print("Running team agent") + assert team_agent is not None + team_agent = TeamAgentFactory.get(team_agent.id) + assert team_agent is not None response = team_agent.run(data=run_input_map["query"]) - print(f"Team Agent response: {response}") assert response is not None assert response["completed"] is True assert response["status"].lower() == "success" assert "data" in response assert response["data"]["session_id"] is not None assert response["data"]["output"] is not None - print("Deleting team agent") team_agent.delete() From 4bd9bc02a4893a55c30b652ab750056cf6b6f65b Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Mon, 7 Oct 2024 17:06:31 +0300 Subject: [PATCH 042/105] initial API key factory (#261) * initial API key factory * API Key CRUD Service refactoring * Fixes for list, delete and get of api keys * Added unit test * Added unit test * Added functional tests * Added functional tests * Removing tem api key env --------- Co-authored-by: xainaz Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/__init__.py | 1 + aixplain/factories/api_key_factory.py | 112 +++++++++++++++++++++ aixplain/modules/__init__.py | 1 + aixplain/modules/api_key.py | 134 ++++++++++++++++++++++++++ tests/functional/apikey/apikey.json | 21 ++++ tests/functional/apikey/test_api.py | 95 ++++++++++++++++++ tests/unit/api_key_test.py | 67 +++++++++++++ 7 files changed, 431 insertions(+) create mode 100644 aixplain/factories/api_key_factory.py create mode 100644 aixplain/modules/api_key.py create mode 100644 tests/functional/apikey/apikey.json create mode 100644 tests/functional/apikey/test_api.py create mode 100644 tests/unit/api_key_test.py diff --git a/aixplain/factories/__init__.py b/aixplain/factories/__init__.py index d540f374..104215a1 100644 --- a/aixplain/factories/__init__.py +++ b/aixplain/factories/__init__.py @@ -32,3 +32,4 @@ from .pipeline_factory import PipelineFactory from .finetune_factory import FinetuneFactory from .wallet_factory import WalletFactory +from .api_key_factory import APIKeyFactory diff --git a/aixplain/factories/api_key_factory.py b/aixplain/factories/api_key_factory.py new file mode 100644 index 00000000..750ce0b2 --- /dev/null +++ b/aixplain/factories/api_key_factory.py @@ -0,0 +1,112 @@ +import json +import logging +import aixplain.utils.config as config +from datetime import datetime +from typing import Text, List, Dict, Union +from aixplain.utils.file_utils import _request_with_retry +from aixplain.modules.api_key import APIKey, APIKeyGlobalLimits + + +class APIKeyFactory: + backend_url = config.BACKEND_URL + + @classmethod + def list(cls) -> List[APIKey]: + """List all API keys""" + resp = "Unspecified error" + try: + url = f"{cls.backend_url}/sdk/api-keys" + headers = {"Content-Type": "application/json", "Authorization": f"Token {config.TEAM_API_KEY}"} + logging.info(f"Start service for GET API List - {url} - {headers}") + r = _request_with_retry("GET", url, headers=headers) + resp = r.json() + except Exception: + raise Exception("API Key List Error: Failed to list API keys") + + if 200 <= r.status_code < 300: + api_keys = [ + APIKey( + id=key["id"], + name=key["name"], + budget=key["budget"] if "budget" in key else None, + global_limits=key["globalLimits"] if "globalLimits" in key else None, + asset_limits=key["assetLimits"] if "assetLimits" in key else [], + expires_at=key["expiresAt"] if "expiresAt" in key else None, + access_key=key["accessKey"], + is_admin=key["isAdmin"], + ) + for key in resp + ] + else: + raise Exception(f"API Key List Error: Failed to list API keys. Error: {str(resp)}") + return api_keys + + @classmethod + def create( + cls, + name: Text, + budget: int, + global_limits: Union[Dict, APIKeyGlobalLimits], + asset_limits: List[Union[Dict, APIKeyGlobalLimits]], + expires_at: datetime, + ) -> APIKey: + """Create a new API key""" + resp = "Unspecified error" + url = f"{cls.backend_url}/sdk/api-keys" + headers = {"Content-Type": "application/json", "Authorization": f"Token {config.TEAM_API_KEY}"} + + payload = APIKey( + name=name, budget=budget, global_limits=global_limits, asset_limits=asset_limits, expires_at=expires_at + ).to_dict() + + try: + logging.info(f"Start service for POST API Creation - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, json=payload, headers=headers) + resp = r.json() + except Exception as e: + raise Exception(f"API Key Creation Error: Failed to create a new API key. Error: {str(e)}") + + if 200 <= r.status_code < 300: + api_key = APIKey( + id=resp["id"], + name=resp["name"], + budget=resp["budget"] if "budget" in resp else None, + global_limits=resp["globalLimits"] if "globalLimits" in resp else None, + asset_limits=resp["assetLimits"] if "assetLimits" in resp else [], + expires_at=resp["expiresAt"] if "expiresAt" in resp else None, + access_key=resp["accessKey"], + is_admin=resp["isAdmin"], + ) + return api_key + else: + raise Exception(f"API Key Creation Error: Failed to create a new API key. Error: {str(resp)}") + + @classmethod + def update(cls, api_key: APIKey) -> APIKey: + """Update an existing API key""" + try: + url = f"{cls.backend_url}/sdk/api-keys/{api_key.id}" + headers = {"Content-Type": "application/json", "Authorization": f"Token {config.TEAM_API_KEY}"} + payload = api_key.to_dict() + + logging.info(f"Updating API key with ID {api_key.id} and new values") + r = _request_with_retry("put", url, json=payload, headers=headers) + resp = r.json() + except Exception as e: + raise Exception(f"API Key Update Error: Failed to update API key with ID {id}. Error: {str(e)}") + + resp = "Unspecified error" + if 200 <= r.status_code < 300: + api_key = APIKey( + id=resp["id"], + name=resp["name"], + budget=resp["budget"] if "budget" in resp else None, + global_limits=resp["globalLimits"] if "globalLimits" in resp else None, + asset_limits=resp["assetLimits"] if "assetLimits" in resp else [], + expires_at=resp["expiresAt"] if "expiresAt" in resp else None, + access_key=resp["accessKey"], + is_admin=resp["isAdmin"], + ) + return api_key + else: + raise Exception(f"API Key Update Error: Failed to update API key with ID {api_key.id}. Error: {str(resp)}") diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index bad0c225..6ac5ae9e 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -36,3 +36,4 @@ from .agent import Agent from .agent.tool import Tool from .team_agent import TeamAgent +from .api_key import APIKey, APIKeyGlobalLimits diff --git a/aixplain/modules/api_key.py b/aixplain/modules/api_key.py new file mode 100644 index 00000000..15b1bb68 --- /dev/null +++ b/aixplain/modules/api_key.py @@ -0,0 +1,134 @@ +from aixplain.modules import Model +from datetime import datetime +from typing import Dict, List, Optional, Text, Union + + +class APIKeyGlobalLimits: + def __init__( + self, + token_per_minute: int, + token_per_day: int, + request_per_minute: int, + request_per_day: int, + model: Optional[Union[Text, Model]] = None, + ): + self.token_per_minute = token_per_minute + self.token_per_day = token_per_day + self.request_per_minute = request_per_minute + self.request_per_day = request_per_day + self.model = model + if model is not None and isinstance(model, str): + from aixplain.factories import ModelFactory + + self.model = ModelFactory.get(model) + + +class APIKey: + def __init__( + self, + name: Text, + expires_at: Union[datetime, Text], + budget: Optional[float] = None, + asset_limits: List[APIKeyGlobalLimits] = [], + global_limits: Optional[Union[Dict, APIKeyGlobalLimits]] = None, + id: int = "", + access_key: Optional[Text] = None, + is_admin: bool = False, + ): + self.id = id + self.name = name + self.budget = budget + self.global_limits = global_limits + if global_limits is not None and isinstance(global_limits, dict): + self.global_limits = APIKeyGlobalLimits( + token_per_minute=global_limits["tpm"], + token_per_day=global_limits["tpd"], + request_per_minute=global_limits["rpm"], + request_per_day=global_limits["rpd"], + ) + self.asset_limits = asset_limits + for i, asset_limit in enumerate(self.asset_limits): + if isinstance(asset_limit, dict): + self.asset_limits[i] = APIKeyGlobalLimits( + token_per_minute=asset_limit["tpm"], + token_per_day=asset_limit["tpd"], + request_per_minute=asset_limit["rpm"], + request_per_day=asset_limit["rpd"], + model=asset_limit["model"], + ) + self.expires_at = expires_at + self.access_key = access_key + self.is_admin = is_admin + self.validate() + + def validate(self) -> None: + """Validate the APIKey object""" + from aixplain.factories import ModelFactory + + if self.budget is not None: + assert self.budget > 0, "Budget must be greater than 0" + if self.global_limits is not None: + assert self.global_limits.request_per_day > 0, "Request per day must be greater than 0" + assert self.global_limits.request_per_minute > 0, "Request per minute must be greater than 0" + assert self.global_limits.token_per_day > 0, "Token per day must be greater than 0" + assert self.global_limits.token_per_minute > 0, "Token per minute must be greater than 0" + for i, asset_limit in enumerate(self.asset_limits): + assert asset_limit.model is not None, f"Asset limit {i} must have a model." + assert asset_limit.request_per_day > 0, f"Asset limit {i} request per day must be greater than 0" + assert asset_limit.request_per_minute > 0, f"Asset limit {i} request per minute must be greater than 0" + assert asset_limit.token_per_day > 0, f"Asset limit {i} token per day must be greater than 0" + assert asset_limit.token_per_minute > 0, f"Asset limit {i} token per minute must be greater than 0" + + if isinstance(asset_limit.model, str): + try: + self.asset_limits[i].model = ModelFactory.get(asset_limit.model) + except Exception: + raise Exception(f"Asset {asset_limit.model} is not a valid aiXplain model.") + + def to_dict(self) -> Dict: + """Convert the APIKey object to a dictionary""" + payload = { + "id": self.id, + "name": self.name, + "budget": self.budget, + "assetLimits": [], + "expiresAt": self.expires_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + if self.global_limits is not None: + payload["globalLimits"] = { + "tpm": self.global_limits.token_per_minute, + "tpd": self.global_limits.token_per_day, + "rpm": self.global_limits.request_per_minute, + "rpd": self.global_limits.request_per_day, + } + + for i, asset_limit in enumerate(self.asset_limits): + payload["assetLimits"].append( + { + "tpm": asset_limit.token_per_minute, + "tpd": asset_limit.token_per_day, + "rpm": asset_limit.request_per_minute, + "rpd": asset_limit.request_per_day, + "model": asset_limit.model.id, + } + ) + return payload + + def delete(self) -> None: + """Delete an API key by its ID""" + import logging + from aixplain.utils import config + from aixplain.utils.file_utils import _request_with_retry + + try: + url = f"{config.BACKEND_URL}/sdk/api-keys/{self.id}" + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for DELETE API Key - {url} - {headers}") + r = _request_with_retry("delete", url, headers=headers) + if r.status_code != 200: + raise Exception() + except Exception: + message = "API Key Deletion Error: Make sure the API Key exists and you are the owner." + logging.error(message) + raise Exception(f"{message}") diff --git a/tests/functional/apikey/apikey.json b/tests/functional/apikey/apikey.json new file mode 100644 index 00000000..dfa9efe1 --- /dev/null +++ b/tests/functional/apikey/apikey.json @@ -0,0 +1,21 @@ +{ + "name": "Test API Key", + "asset_limits": [ + { + "model": "640b517694bf816d35a59125", + "token_per_minute": 100, + "token_per_day": 1000, + "request_per_day": 1000, + "request_per_minute": 100 + } + ], + "global_limits": { + "token_per_minute": 100, + "token_per_day": 1000, + "request_per_day": 1000, + "request_per_minute": 100 + }, + "budget": 1000, + "expires_at": "2024-12-12T00:00:00Z" + } + \ No newline at end of file diff --git a/tests/functional/apikey/test_api.py b/tests/functional/apikey/test_api.py new file mode 100644 index 00000000..31094a59 --- /dev/null +++ b/tests/functional/apikey/test_api.py @@ -0,0 +1,95 @@ +from aixplain.factories.api_key_factory import APIKeyFactory +from aixplain.modules import APIKey, APIKeyGlobalLimits +from datetime import datetime +import json +import pytest + + +def test_create_api_key_from_json(): + api_key_json = "tests/functional/apikey/apikey.json" + + with open(api_key_json, "r") as file: + api_key_data = json.load(file) + + expires_at = datetime.strptime(api_key_data["expires_at"], "%Y-%m-%dT%H:%M:%SZ") + + api_key = APIKeyFactory.create( + name=api_key_data["name"], + asset_limits=[ + APIKeyGlobalLimits( + model=api_key_data["asset_limits"][0]["model"], + token_per_minute=api_key_data["asset_limits"][0]["token_per_minute"], + token_per_day=api_key_data["asset_limits"][0]["token_per_day"], + request_per_day=api_key_data["asset_limits"][0]["request_per_day"], + request_per_minute=api_key_data["asset_limits"][0]["request_per_minute"], + ) + ], + global_limits=APIKeyGlobalLimits( + token_per_minute=api_key_data["global_limits"]["token_per_minute"], + token_per_day=api_key_data["global_limits"]["token_per_day"], + request_per_day=api_key_data["global_limits"]["request_per_day"], + request_per_minute=api_key_data["global_limits"]["request_per_minute"], + ), + budget=api_key_data["budget"], + expires_at=expires_at, + ) + + assert isinstance(api_key, APIKey) + assert api_key.id != "" + assert api_key.name == api_key_data["name"] + + api_key.delete() + + +def test_create_api_key_from_dict(): + api_key_dict = { + "asset_limits": [ + { + "model": "640b517694bf816d35a59125", + "token_per_minute": 100, + "token_per_day": 1000, + "request_per_day": 1000, + "request_per_minute": 100, + } + ], + "global_limits": {"token_per_minute": 100, "token_per_day": 1000, "request_per_day": 1000, "request_per_minute": 100}, + "budget": 1000, + "expires_at": "2024-12-12T00:00:00Z", + } + + api_key_name = "Test API Key" + api_key = APIKeyFactory.create( + name=api_key_name, + asset_limits=[APIKeyGlobalLimits(**limit) for limit in api_key_dict["asset_limits"]], + global_limits=APIKeyGlobalLimits(**api_key_dict["global_limits"]), + budget=api_key_dict["budget"], + expires_at=datetime.strptime(api_key_dict["expires_at"], "%Y-%m-%dT%H:%M:%SZ"), + ) + + assert isinstance(api_key, APIKey) + assert api_key.id != "" + assert api_key.name == api_key_name + + api_key.delete() + + +def test_list_api_keys(): + api_keys = APIKeyFactory.list() + assert isinstance(api_keys, list) + + for api_key in api_keys: + assert isinstance(api_key, APIKey) + assert api_key.id != "" + + +def test_create_api_key_wrong_input(): + api_key_name = "Test API Key" + + with pytest.raises(Exception): + APIKeyFactory.create( + name=api_key_name, + asset_limits="invalid_limits", + global_limits="invalid_limits", + budget=-1000, + expires_at="invalid_date", + ) diff --git a/tests/unit/api_key_test.py b/tests/unit/api_key_test.py new file mode 100644 index 00000000..fa610cae --- /dev/null +++ b/tests/unit/api_key_test.py @@ -0,0 +1,67 @@ +__author__ = "aixplain" +from aixplain.modules import APIKeyGlobalLimits +from datetime import datetime +import requests_mock +import aixplain.utils.config as config +from aixplain.factories.api_key_factory import APIKeyFactory +import json + + +def read_data(data_path): + return json.load(open(data_path, "r")) + + +def test_api_key_service(): + with requests_mock.Mocker() as mock: + model_id = "640b517694bf816d35a59125" + model_url = f"{config.BACKEND_URL}/sdk/models/{model_id}" + model_map = read_data("tests/unit/mock_responses/model_response.json") + mock.get(model_url, json=model_map) + + create_url = f"{config.BACKEND_URL}/sdk/api-keys" + api_key_response = { + "id": "key-id", + "name": "Name", + "accessKey": "access-key", + "budget": 1000, + "globalLimits": {"tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}, + "assetLimits": [{"model": model_id, "tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}], + "expiresAt": "2024-10-07T00:00:00Z", + "isAdmin": False, + } + mock.post(create_url, json=api_key_response) + + api_key = APIKeyFactory.create( + name="Test API Key", + asset_limits=[ + APIKeyGlobalLimits( + model=model_id, token_per_minute=100, token_per_day=1000, request_per_day=1000, request_per_minute=100 + ) + ], + global_limits=APIKeyGlobalLimits( + token_per_minute=100, token_per_day=1000, request_per_day=1000, request_per_minute=100 + ), + budget=1000, + expires_at=datetime(2024, 10, 7), + ) + + assert api_key.id == api_key_response["id"] + assert api_key.access_key == api_key_response["accessKey"] + assert api_key.budget == api_key_response["budget"] + assert api_key.expires_at == api_key_response["expiresAt"] + + # List test + list_url = f"{config.BACKEND_URL}/sdk/api-keys" + mock.get(list_url, json=[api_key_response]) + + api_keys = APIKeyFactory.list() + + assert len(api_keys) == 1 + assert api_keys[0].id == api_key_response["id"] + assert api_keys[0].access_key == api_key_response["accessKey"] + + # Delete Test: + delete_url = f"{config.BACKEND_URL}/sdk/api-keys/{api_key.id}" + mock.delete(delete_url, status_code=200) + + api_key.delete() From 63e0f8278dfd081bc8c807c7543eafe7a3d00412 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Mon, 7 Oct 2024 17:15:54 +0200 Subject: [PATCH 043/105] BUG-177: Fixed pipeline validation (#262) * BUG-177: Fixed pipeline validation * BUG-177 rephrased exception message --- aixplain/modules/pipeline/designer/pipeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/aixplain/modules/pipeline/designer/pipeline.py b/aixplain/modules/pipeline/designer/pipeline.py index b2ebd19b..d0522038 100644 --- a/aixplain/modules/pipeline/designer/pipeline.py +++ b/aixplain/modules/pipeline/designer/pipeline.py @@ -16,6 +16,7 @@ BareMetric ) from .enums import NodeType, RouteType, Operation +from .mixins import OutputableMixin T = TypeVar("T", bound="AssetNode") @@ -99,7 +100,7 @@ def validate_nodes(self): raise ValueError(f"Output node {node.label} not linked in") # validate rest of the nodes are linked in and out else: - if isinstance(node, AssetNode): + if isinstance(node, OutputableMixin): contains_asset = True if node.number not in link_from_map: raise ValueError(f"Node {node.label} not linked in") @@ -108,7 +109,7 @@ def validate_nodes(self): if not contains_input or not contains_output or not contains_asset: raise ValueError( - "Pipeline must contain at least one input, output and asset node" # noqa + "The pipeline requires at least one asset or script node, along with both input and output nodes." # noqa ) def is_param_linked(self, node, param): From 0208be12c2773f65d589cbb6c518e0ce7b9500f6 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 8 Oct 2024 10:04:12 -0400 Subject: [PATCH 044/105] Get usage limit (#264) --- aixplain/modules/__init__.py | 2 +- aixplain/modules/api_key.py | 48 ++++++++++++++++++++++++++--- tests/functional/apikey/test_api.py | 5 ++- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index 6ac5ae9e..d49e29d4 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -36,4 +36,4 @@ from .agent import Agent from .agent.tool import Tool from .team_agent import TeamAgent -from .api_key import APIKey, APIKeyGlobalLimits +from .api_key import APIKey, APIKeyGlobalLimits, APIKeyUsageLimit diff --git a/aixplain/modules/api_key.py b/aixplain/modules/api_key.py index 15b1bb68..82328dc4 100644 --- a/aixplain/modules/api_key.py +++ b/aixplain/modules/api_key.py @@ -1,3 +1,6 @@ +import logging +from aixplain.utils import config +from aixplain.utils.file_utils import _request_with_retry from aixplain.modules import Model from datetime import datetime from typing import Dict, List, Optional, Text, Union @@ -23,6 +26,22 @@ def __init__( self.model = ModelFactory.get(model) +class APIKeyUsageLimit: + def __init__(self, request_count: int, request_count_limit: int, token_count: int, token_count_limit: int): + """Get the usage limits of an API key + + Args: + request_count (int): number of requests made + request_count_limit (int): limit of requests + token_count (int): number of tokens used + token_count_limit (int): limit of tokens + """ + self.request_count = request_count + self.request_count_limit = request_count_limit + self.token_count = token_count + self.token_count_limit = token_count_limit + + class APIKey: def __init__( self, @@ -117,10 +136,6 @@ def to_dict(self) -> Dict: def delete(self) -> None: """Delete an API key by its ID""" - import logging - from aixplain.utils import config - from aixplain.utils.file_utils import _request_with_retry - try: url = f"{config.BACKEND_URL}/sdk/api-keys/{self.id}" headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} @@ -132,3 +147,28 @@ def delete(self) -> None: message = "API Key Deletion Error: Make sure the API Key exists and you are the owner." logging.error(message) raise Exception(f"{message}") + + def get_usage(self, asset_id: Optional[Text] = None) -> APIKeyUsageLimit: + """Get the usage limits of an API key""" + try: + url = f"{config.BACKEND_URL}/sdk/api-keys/{self.id}/usage-limits" + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for GET API Key Usage - {url} - {headers}") + if asset_id is not None: + url += f"?assetId={asset_id}" + r = _request_with_retry("GET", url, headers=headers) + resp = r.json() + except Exception: + message = "API Key Usage Error: Make sure the API Key exists and you are the owner." + logging.error(message) + raise Exception(f"{message}") + + if 200 <= r.status_code < 300: + return APIKeyUsageLimit( + request_count=resp["requestCount"], + request_count_limit=resp["requestCountLimit"], + token_count=resp["tokenCount"], + token_count_limit=resp["tokenCountLimit"], + ) + else: + raise Exception(f"API Key Usage Error: Failed to get usage. Error: {str(resp)}") diff --git a/tests/functional/apikey/test_api.py b/tests/functional/apikey/test_api.py index 31094a59..801a1243 100644 --- a/tests/functional/apikey/test_api.py +++ b/tests/functional/apikey/test_api.py @@ -1,5 +1,5 @@ from aixplain.factories.api_key_factory import APIKeyFactory -from aixplain.modules import APIKey, APIKeyGlobalLimits +from aixplain.modules import APIKey, APIKeyGlobalLimits, APIKeyUsageLimit from datetime import datetime import json import pytest @@ -81,6 +81,9 @@ def test_list_api_keys(): assert isinstance(api_key, APIKey) assert api_key.id != "" + usage = api_key.get_usage() + assert isinstance(usage, APIKeyUsageLimit) + def test_create_api_key_wrong_input(): api_key_name = "Test API Key" From 662420eccb3c5f052d4ed8c9700995a1d6bfa3e3 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 9 Oct 2024 12:42:30 -0300 Subject: [PATCH 045/105] Update SDK version (#266) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index be397bdd..44f4d45e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.18" +version = "0.2.20" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" From 828bdee313d28eb48e02586e2cd1641eeb0428d2 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 9 Oct 2024 17:30:31 -0300 Subject: [PATCH 046/105] Eng 739 get api key (#268) * Get usage limit * Get usage limit of an api key service * Fix asset limits * Fixing usage limit services --- aixplain/factories/api_key_factory.py | 32 ++++++++++++++++++++++++--- aixplain/modules/api_key.py | 4 ++-- tests/functional/apikey/test_api.py | 5 +++-- tests/unit/api_key_test.py | 2 +- 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/aixplain/factories/api_key_factory.py b/aixplain/factories/api_key_factory.py index 750ce0b2..f5d0a6b2 100644 --- a/aixplain/factories/api_key_factory.py +++ b/aixplain/factories/api_key_factory.py @@ -2,9 +2,9 @@ import logging import aixplain.utils.config as config from datetime import datetime -from typing import Text, List, Dict, Union +from typing import Text, List, Optional, Dict, Union from aixplain.utils.file_utils import _request_with_retry -from aixplain.modules.api_key import APIKey, APIKeyGlobalLimits +from aixplain.modules.api_key import APIKey, APIKeyGlobalLimits, APIKeyUsageLimit class APIKeyFactory: @@ -72,7 +72,7 @@ def create( name=resp["name"], budget=resp["budget"] if "budget" in resp else None, global_limits=resp["globalLimits"] if "globalLimits" in resp else None, - asset_limits=resp["assetLimits"] if "assetLimits" in resp else [], + asset_limits=resp["assetsLimits"] if "assetsLimits" in resp else [], expires_at=resp["expiresAt"] if "expiresAt" in resp else None, access_key=resp["accessKey"], is_admin=resp["isAdmin"], @@ -110,3 +110,29 @@ def update(cls, api_key: APIKey) -> APIKey: return api_key else: raise Exception(f"API Key Update Error: Failed to update API key with ID {api_key.id}. Error: {str(resp)}") + + @classmethod + def get_usage_limit(cls, api_key: Text = config.TEAM_API_KEY, asset_id: Optional[Text] = None) -> APIKeyUsageLimit: + """Get API key usage limit""" + try: + url = f"{config.BACKEND_URL}/sdk/api-keys/usage-limits" + if asset_id is not None: + url += f"?assetId={asset_id}" + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} + logging.info(f"Start service for GET API Key Usage - {url} - {headers}") + r = _request_with_retry("GET", url, headers=headers) + resp = r.json() + except Exception: + message = "API Key Usage Error: Make sure the API Key exists and you are the owner." + logging.error(message) + raise Exception(f"{message}") + + if 200 <= r.status_code < 300: + return APIKeyUsageLimit( + request_count=resp["requestCount"], + request_count_limit=resp["requestCountLimit"], + token_count=resp["tokenCount"], + token_count_limit=resp["tokenCountLimit"], + ) + else: + raise Exception(f"API Key Usage Error: Failed to get usage. Error: {str(resp)}") diff --git a/aixplain/modules/api_key.py b/aixplain/modules/api_key.py index 82328dc4..884735b6 100644 --- a/aixplain/modules/api_key.py +++ b/aixplain/modules/api_key.py @@ -73,7 +73,7 @@ def __init__( token_per_day=asset_limit["tpd"], request_per_minute=asset_limit["rpm"], request_per_day=asset_limit["rpd"], - model=asset_limit["model"], + model=asset_limit["assetId"], ) self.expires_at = expires_at self.access_key = access_key @@ -129,7 +129,7 @@ def to_dict(self) -> Dict: "tpd": asset_limit.token_per_day, "rpm": asset_limit.request_per_minute, "rpd": asset_limit.request_per_day, - "model": asset_limit.model.id, + "assetId": asset_limit.model.id, } ) return payload diff --git a/tests/functional/apikey/test_api.py b/tests/functional/apikey/test_api.py index 801a1243..49189d16 100644 --- a/tests/functional/apikey/test_api.py +++ b/tests/functional/apikey/test_api.py @@ -81,8 +81,9 @@ def test_list_api_keys(): assert isinstance(api_key, APIKey) assert api_key.id != "" - usage = api_key.get_usage() - assert isinstance(usage, APIKeyUsageLimit) + if api_key.is_admin is False: + usage = api_key.get_usage() + assert isinstance(usage, APIKeyUsageLimit) def test_create_api_key_wrong_input(): diff --git a/tests/unit/api_key_test.py b/tests/unit/api_key_test.py index fa610cae..60d2371d 100644 --- a/tests/unit/api_key_test.py +++ b/tests/unit/api_key_test.py @@ -25,7 +25,7 @@ def test_api_key_service(): "accessKey": "access-key", "budget": 1000, "globalLimits": {"tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}, - "assetLimits": [{"model": model_id, "tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}], + "assetLimits": [{"assetId": model_id, "tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}], "expiresAt": "2024-10-07T00:00:00Z", "isAdmin": False, } From b13c21deae8167114a6463dd0a3febf07cdd7728 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 10 Oct 2024 12:16:48 -0300 Subject: [PATCH 047/105] Fix Update API Key Bug (#272) --- aixplain/factories/api_key_factory.py | 4 +- aixplain/modules/api_key.py | 7 +- pyproject.toml | 2 +- tests/functional/apikey/test_api.py | 106 ++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 5 deletions(-) diff --git a/aixplain/factories/api_key_factory.py b/aixplain/factories/api_key_factory.py index f5d0a6b2..4ac8f00a 100644 --- a/aixplain/factories/api_key_factory.py +++ b/aixplain/factories/api_key_factory.py @@ -85,6 +85,7 @@ def create( def update(cls, api_key: APIKey) -> APIKey: """Update an existing API key""" try: + resp = "Unspecified error" url = f"{cls.backend_url}/sdk/api-keys/{api_key.id}" headers = {"Content-Type": "application/json", "Authorization": f"Token {config.TEAM_API_KEY}"} payload = api_key.to_dict() @@ -95,14 +96,13 @@ def update(cls, api_key: APIKey) -> APIKey: except Exception as e: raise Exception(f"API Key Update Error: Failed to update API key with ID {id}. Error: {str(e)}") - resp = "Unspecified error" if 200 <= r.status_code < 300: api_key = APIKey( id=resp["id"], name=resp["name"], budget=resp["budget"] if "budget" in resp else None, global_limits=resp["globalLimits"] if "globalLimits" in resp else None, - asset_limits=resp["assetLimits"] if "assetLimits" in resp else [], + asset_limits=resp["assetsLimits"] if "assetsLimits" in resp else [], expires_at=resp["expiresAt"] if "expiresAt" in resp else None, access_key=resp["accessKey"], is_admin=resp["isAdmin"], diff --git a/aixplain/modules/api_key.py b/aixplain/modules/api_key.py index 884735b6..886b0dab 100644 --- a/aixplain/modules/api_key.py +++ b/aixplain/modules/api_key.py @@ -46,7 +46,7 @@ class APIKey: def __init__( self, name: Text, - expires_at: Union[datetime, Text], + expires_at: Optional[Union[datetime, Text]] = None, budget: Optional[float] = None, asset_limits: List[APIKeyGlobalLimits] = [], global_limits: Optional[Union[Dict, APIKeyGlobalLimits]] = None, @@ -111,9 +111,12 @@ def to_dict(self) -> Dict: "name": self.name, "budget": self.budget, "assetLimits": [], - "expiresAt": self.expires_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + "expiresAt": self.expires_at, } + if self.expires_at is not None and isinstance(self.expires_at, datetime): + payload["expiresAt"] = self.expires_at.strftime("%Y-%m-%dT%H:%M:%S.%fZ") + if self.global_limits is not None: payload["globalLimits"] = { "tpm": self.global_limits.token_per_minute, diff --git a/pyproject.toml b/pyproject.toml index 44f4d45e..e0df02a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.20" +version = "0.2.21rc0" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" diff --git a/tests/functional/apikey/test_api.py b/tests/functional/apikey/test_api.py index 49189d16..80b75189 100644 --- a/tests/functional/apikey/test_api.py +++ b/tests/functional/apikey/test_api.py @@ -73,6 +73,57 @@ def test_create_api_key_from_dict(): api_key.delete() +def test_create_update_api_key_from_dict(): + api_key_dict = { + "asset_limits": [ + { + "model": "640b517694bf816d35a59125", + "token_per_minute": 100, + "token_per_day": 1000, + "request_per_day": 1000, + "request_per_minute": 100, + } + ], + "global_limits": {"token_per_minute": 100, "token_per_day": 1000, "request_per_day": 1000, "request_per_minute": 100}, + "budget": 1000, + "expires_at": "2024-12-12T00:00:00Z", + } + + api_key_name = "Test API Key" + api_key = APIKeyFactory.create( + name=api_key_name, + asset_limits=[APIKeyGlobalLimits(**limit) for limit in api_key_dict["asset_limits"]], + global_limits=APIKeyGlobalLimits(**api_key_dict["global_limits"]), + budget=api_key_dict["budget"], + expires_at=datetime.strptime(api_key_dict["expires_at"], "%Y-%m-%dT%H:%M:%SZ"), + ) + + assert isinstance(api_key, APIKey) + assert api_key.id != "" + assert api_key.name == api_key_name + + api_key.global_limits.token_per_day = 222 + api_key.global_limits.token_per_minute = 222 + api_key.global_limits.request_per_day = 222 + api_key.global_limits.request_per_minute = 222 + api_key.asset_limits[0].request_per_day = 222 + api_key.asset_limits[0].request_per_minute = 222 + api_key.asset_limits[0].token_per_day = 222 + api_key.asset_limits[0].token_per_minute = 222 + api_key = APIKeyFactory.update(api_key) + + assert api_key.global_limits.token_per_day == 222 + assert api_key.global_limits.token_per_minute == 222 + assert api_key.global_limits.request_per_day == 222 + assert api_key.global_limits.request_per_minute == 222 + assert api_key.asset_limits[0].request_per_day == 222 + assert api_key.asset_limits[0].request_per_minute == 222 + assert api_key.asset_limits[0].token_per_day == 222 + assert api_key.asset_limits[0].token_per_minute == 222 + + api_key.delete() + + def test_list_api_keys(): api_keys = APIKeyFactory.list() assert isinstance(api_keys, list) @@ -86,6 +137,61 @@ def test_list_api_keys(): assert isinstance(usage, APIKeyUsageLimit) +def test_list_update_api_keys(): + api_keys = APIKeyFactory.list() + assert isinstance(api_keys, list) + + for api_key in api_keys: + assert isinstance(api_key, APIKey) + assert api_key.id != "" + + from random import randint + + number = randint(0, 10000) + if api_key.global_limits is None: + api_key.global_limits = APIKeyGlobalLimits( + token_per_minute=number, + token_per_day=number, + request_per_day=number, + request_per_minute=number, + ) + else: + api_key.global_limits.token_per_day = number + api_key.global_limits.token_per_minute = number + api_key.global_limits.request_per_day = number + api_key.global_limits.request_per_minute = number + + if api_key.asset_limits is None: + api_key.asset_limits = [] + + if len(api_key.asset_limits) == 0: + api_key.asset_limits.append( + APIKeyGlobalLimits( + model="640b517694bf816d35a59125", + token_per_minute=number, + token_per_day=number, + request_per_day=number, + request_per_minute=number, + ) + ) + else: + api_key.asset_limits[0].request_per_day = number + api_key.asset_limits[0].request_per_minute = number + api_key.asset_limits[0].token_per_day = number + api_key.asset_limits[0].token_per_minute = number + api_key = APIKeyFactory.update(api_key) + + assert api_key.global_limits.token_per_day == number + assert api_key.global_limits.token_per_minute == number + assert api_key.global_limits.request_per_day == number + assert api_key.global_limits.request_per_minute == number + assert api_key.asset_limits[0].request_per_day == number + assert api_key.asset_limits[0].request_per_minute == number + assert api_key.asset_limits[0].token_per_day == number + assert api_key.asset_limits[0].token_per_minute == number + break + + def test_create_api_key_wrong_input(): api_key_name = "Test API Key" From 81822939cf2875d82cbf171ca929157395497a40 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Wed, 16 Oct 2024 18:25:58 +0300 Subject: [PATCH 048/105] Eng 735 ai xplain sdk improve error log messages (#271) * Improve error log for: Benchmark, Corpus, Dataset, Model, Pipeline * Fixed issue + Added test * Added required changes to error prompts * Small improvements --------- Co-authored-by: xainaz Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/benchmark_factory.py | 55 +++++++---- aixplain/factories/corpus_factory.py | 91 ++++++++++++------- aixplain/factories/dataset_factory.py | 72 +++++++++------ aixplain/factories/model_factory.py | 76 +++++++++------- .../factories/pipeline_factory/__init__.py | 65 ++++++++----- tests/unit/benchmark_test.py | 70 ++++++++++++++ tests/unit/corpus_test.py | 34 +++++++ tests/unit/dataset_test.py | 34 +++++++ tests/unit/model_test.py | 42 +++++++++ tests/unit/pipeline_test.py | 50 ++++++++-- 10 files changed, 443 insertions(+), 146 deletions(-) create mode 100644 tests/unit/benchmark_test.py create mode 100644 tests/unit/corpus_test.py create mode 100644 tests/unit/dataset_test.py diff --git a/aixplain/factories/benchmark_factory.py b/aixplain/factories/benchmark_factory.py index 57d4a833..305fb5d9 100644 --- a/aixplain/factories/benchmark_factory.py +++ b/aixplain/factories/benchmark_factory.py @@ -22,10 +22,8 @@ """ import logging -from typing import Dict, List, Optional, Text +from typing import Dict, List, Text import json -import pandas as pd -from pathlib import Path from aixplain.enums.supplier import Supplier from aixplain.modules import Dataset, Metric, Model from aixplain.modules.benchmark_job import BenchmarkJob @@ -34,9 +32,8 @@ from aixplain.factories.dataset_factory import DatasetFactory from aixplain.factories.model_factory import ModelFactory from aixplain.utils import config -from aixplain.utils.file_utils import _request_with_retry, save_file +from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin -from warnings import warn class BenchmarkFactory: @@ -117,7 +114,7 @@ def get(cls, benchmark_id: str) -> Benchmark: logging.info(f"Start service for GET Benchmark - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() - benchmark = cls._create_benchmark_from_response(resp) + except Exception as e: status_code = 400 if resp is not None and "statusCode" in resp: @@ -125,10 +122,17 @@ def get(cls, benchmark_id: str) -> Benchmark: message = resp["message"] message = f"Benchmark Creation: Status {status_code} - {message}" else: - message = f"Benchmark Creation: Unspecified Error" + message = "Benchmark Creation: Unspecified Error" logging.error(f"Benchmark Creation Failed: {e}") raise Exception(f"Status {status_code}: {message}") - return benchmark + if 200 <= r.status_code < 300: + benchmark = cls._create_benchmark_from_response(resp) + logging.info(f"Benchmark {benchmark_id} retrieved successfully.") + return benchmark + else: + error_message = f"Benchmark GET Error: Status {r.status_code} - {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def get_job(cls, job_id: Text) -> BenchmarkJob: @@ -189,7 +193,7 @@ def create(cls, name: str, dataset_list: List[Dataset], model_list: List[Model], """ payload = {} try: - url = urljoin(cls.backend_url, f"sdk/benchmarks") + url = urljoin(cls.backend_url, "sdk/benchmarks") headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} payload = { "name": name, @@ -204,12 +208,19 @@ def create(cls, name: str, dataset_list: List[Dataset], model_list: List[Model], payload = json.dumps(clean_payload) r = _request_with_retry("post", url, headers=headers, data=payload) resp = r.json() - logging.info(f"Creating Benchmark Job: Status for {name}: {resp}") - return cls.get(resp["id"]) + except Exception as e: error_message = f"Creating Benchmark Job: Error in Creating Benchmark with payload {payload} : {e}" logging.error(error_message, exc_info=True) - return None + raise Exception(error_message) + + if 200 <= r.status_code < 300: + logging.info(f"Benchmark {name} created successfully.") + return cls.get(resp["id"]) + else: + error_message = f"Benchmark Creation Error: Status {r.status_code} - {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: @@ -223,7 +234,7 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: List[str]: List of supported normalization options """ try: - url = urljoin(cls.backend_url, f"sdk/benchmarks/normalization-options") + url = urljoin(cls.backend_url, "sdk/benchmarks/normalization-options") if cls.aixplain_key != "": headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} else: @@ -231,13 +242,20 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: payload = json.dumps({"metricId": metric.id, "modelIds": [model.id]}) r = _request_with_retry("post", url, headers=headers, data=payload) resp = r.json() - logging.info(f"Listing Normalization Options: Status of listing options: {resp}") - normalization_options = [item["value"] for item in resp] - return normalization_options + except Exception as e: error_message = f"Listing Normalization Options: Error in getting Normalization Options: {e}" logging.error(error_message, exc_info=True) - return [] + raise Exception(error_message) + + if 200 <= r.status_code < 300: + logging.info("Listing Normalization Options: ") + normalization_options = [item["value"] for item in resp] + return normalization_options + else: + error_message = f"Error listing normalization options: Status {r.status_code} - {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def get_benchmark_job_scores(cls, job_id): @@ -255,7 +273,8 @@ def __get_model_name(model_id): if model.version is not None: name = f"{name}({model.version})" return name + benchmarkJob = cls.get_job(job_id) scores_df = benchmarkJob.get_scores() scores_df["Model"] = scores_df["Model"].apply(lambda x: __get_model_name(x)) - return scores_df \ No newline at end of file + return scores_df diff --git a/aixplain/factories/corpus_factory.py b/aixplain/factories/corpus_factory.py index 1f81ac4d..3b9c5e4b 100644 --- a/aixplain/factories/corpus_factory.py +++ b/aixplain/factories/corpus_factory.py @@ -21,7 +21,6 @@ Corpus Factory Class """ -import aixplain.utils.config as config import aixplain.processes.data_onboarding.onboard_functions as onboard_functions import json import logging @@ -86,12 +85,12 @@ def __from_response(cls, response: Dict) -> Corpus: try: license = License(response["license"]["typeId"]) - except: + except Exception: license = None try: length = int(response["segmentsCount"]) - except: + except Exception: length = None corpus = Corpus( @@ -116,17 +115,27 @@ def get(cls, corpus_id: Text) -> Corpus: Returns: Corpus: Created 'Corpus' object """ - url = urljoin(cls.backend_url, f"sdk/corpora/{corpus_id}/overview") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + try: + url = urljoin(cls.backend_url, f"sdk/corpora/{corpus_id}/overview") + if cls.aixplain_key != "": + headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for GET Corpus - {url} - {headers}") + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + + except Exception as e: + error_message = f"Error retrieving Corpus {corpus_id}: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + if 200 <= r.status_code < 300: + logging.info(f"Corpus {corpus_id} retrieved successfully.") + return cls.__from_response(resp) else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - logging.info(f"Start service for GET Corpus - {url} - {headers}") - r = _request_with_retry("get", url, headers=headers) - resp = r.json() - if "statusCode" in resp and resp["statusCode"] == 404: - raise Exception(f"Corpus GET Error: Dataset {corpus_id} not found.") - return cls.__from_response(resp) + error_message = f"Corpus GET Error: Status {r.status_code} - {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def create_asset_from_id(cls, corpus_id: Text) -> Corpus: @@ -168,7 +177,7 @@ def list( else: headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - assert 0 < page_size <= 100, f"Corpus List Error: Page size must be greater than 0 and not exceed 100." + assert 0 < page_size <= 100, "Corpus List Error: Page size must be greater than 0 and not exceed 100." payload = {"pageSize": page_size, "pageNumber": page_number, "sort": [{"field": "createdAt", "dir": -1}]} if query is not None: @@ -188,26 +197,38 @@ def list( language = [language] payload["language"] = [lng.value["language"] for lng in language] - logging.info(f"Start service for POST List Corpus - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, json=payload) - resp = r.json() - corpora, page_total, total = [], 0, 0 - if "results" in resp: - results = resp["results"] - page_total = resp["pageTotal"] - total = resp["total"] - logging.info(f"Response for POST List Corpus - Page Total: {page_total} / Total: {total}") - for corpus in results: - corpus_ = cls.__from_response(corpus) - # add languages - languages = [] - for lng in corpus["languages"]: - if "dialect" not in lng: - lng["dialect"] = "" - languages.append(Language(lng)) - corpus_.kwargs["languages"] = languages - corpora.append(corpus_) - return {"results": corpora, "page_total": page_total, "page_number": page_number, "total": total} + try: + logging.info(f"Start service for POST List Corpus - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) + resp = r.json() + + except Exception as e: + error_message = f"Error listing corpora: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + + if 200 <= r.status_code < 300: + corpora, page_total, total = [], 0, 0 + if "results" in resp: + results = resp["results"] + page_total = resp["pageTotal"] + total = resp["total"] + logging.info(f"Response for POST List Corpus - Page Total: {page_total} / Total: {total}") + for corpus in results: + corpus_ = cls.__from_response(corpus) + # add languages + languages = [] + for lng in corpus["languages"]: + if "dialect" not in lng: + lng["dialect"] = "" + languages.append(Language(lng)) + corpus_.kwargs["languages"] = languages + corpora.append(corpus_) + return {"results": corpora, "page_total": page_total, "page_number": page_number, "total": total} + else: + error_message = f"Corpus List Error: Status {r.status_code} - {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def get_assets_from_page( @@ -245,7 +266,7 @@ def create( functions: List[Function] = [], privacy: Privacy = Privacy.PRIVATE, error_handler: ErrorHandler = ErrorHandler.SKIP, - api_key: Optional[Text] = None + api_key: Optional[Text] = None, ) -> Dict: """Asynchronous call to Upload a corpus to the user's dashboard. diff --git a/aixplain/factories/dataset_factory.py b/aixplain/factories/dataset_factory.py index 5e69d572..081513c0 100644 --- a/aixplain/factories/dataset_factory.py +++ b/aixplain/factories/dataset_factory.py @@ -21,7 +21,6 @@ Dataset Factory Class """ -import aixplain.utils.config as config import aixplain.processes.data_onboarding.onboard_functions as onboard_functions import json import os @@ -49,7 +48,6 @@ from typing import Any, Dict, List, Optional, Text, Union from urllib.parse import urljoin from uuid import uuid4 -from warnings import warn class DatasetFactory(AssetFactory): @@ -122,7 +120,7 @@ def __from_response(cls, response: Dict) -> Dataset: target_data_list = [data[data_id] for data_id in out["dataIds"]] data_name = target_data_list[0].name target_data[data_name] = target_data_list - except: + except Exception: pass # process function @@ -164,17 +162,27 @@ def get(cls, dataset_id: Text) -> Dataset: Returns: Dataset: Created 'Dataset' object """ - url = urljoin(cls.backend_url, f"sdk/datasets/{dataset_id}/overview") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + try: + url = urljoin(cls.backend_url, f"sdk/datasets/{dataset_id}/overview") + if cls.aixplain_key != "": + headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for GET Dataset - {url} - {headers}") + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + + except Exception as e: + error_message = f"Error retrieving Dataset {dataset_id}: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + if 200 <= r.status_code < 300: + logging.info(f"Dataset {dataset_id} retrieved successfully.") + return cls.__from_response(resp) else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - logging.info(f"Start service for GET Dataset - {url} - {headers}") - r = _request_with_retry("get", url, headers=headers) - resp = r.json() - if "statusCode" in resp and resp["statusCode"] == 404: - raise Exception(f"Dataset GET Error: Dataset {dataset_id} not found.") - return cls.__from_response(resp) + error_message = f"Dataset GET Error: Status {r.status_code} - {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def list( @@ -211,7 +219,7 @@ def list( else: headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - assert 0 < page_size <= 100, f"Dataset List Error: Page size must be greater than 0 and not exceed 100." + assert 0 < page_size <= 100, "Dataset List Error: Page size must be greater than 0 and not exceed 100." payload = { "pageSize": page_size, "pageNumber": page_number, @@ -245,19 +253,29 @@ def list( target_languages = [target_languages] payload["output"]["languages"] = [lng.value["language"] for lng in target_languages] - logging.info(f"Start service for POST List Dataset - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, json=payload) - resp = r.json() + try: + logging.info(f"Start service for POST List Dataset - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) + resp = r.json() - datasets, page_total, total = [], 0, 0 - if "results" in resp: - results = resp["results"] - page_total = resp["pageTotal"] - total = resp["total"] - logging.info(f"Response for POST List Dataset - Page Total: {page_total} / Total: {total}") - for dataset in results: - datasets.append(cls.__from_response(dataset)) - return {"results": datasets, "page_total": page_total, "page_number": page_number, "total": total} + except Exception as e: + error_message = f"Error listing datasets: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + if 200 <= r.status_code < 300: + datasets, page_total, total = [], 0, 0 + if "results" in resp: + results = resp["results"] + page_total = resp["pageTotal"] + total = resp["total"] + logging.info(f"Response for POST List Dataset - Page Total: {page_total} / Total: {total}") + for dataset in results: + datasets.append(cls.__from_response(dataset)) + return {"results": datasets, "page_total": page_total, "page_number": page_number, "total": total} + else: + error_message = f"Dataset List Error: Status {r.status_code} - {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def create( @@ -282,7 +300,7 @@ def create( error_handler: ErrorHandler = ErrorHandler.SKIP, s3_link: Optional[Text] = None, aws_credentials: Optional[Dict[Text, Text]] = {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None}, - api_key: Optional[Text] = None + api_key: Optional[Text] = None, ) -> Dict: """Dataset Onboard diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index d82bdd63..5df7c924 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -113,13 +113,7 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: logging.info(f"Start service for GET Model - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() - # set api key - resp["api_key"] = config.TEAM_API_KEY - if api_key is not None: - resp["api_key"] = api_key - model = cls._create_model_from_response(resp) - logging.info(f"Model Creation: Model {model_id} instantiated.") - return model + except Exception: if resp is not None and "statusCode" in resp: status_code = resp["statusCode"] @@ -129,6 +123,17 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: message = "Model Creation: Unspecified Error" logging.error(message) raise Exception(f"{message}") + if 200 <= r.status_code < 300: + resp["api_key"] = config.TEAM_API_KEY + if api_key is not None: + resp["api_key"] = api_key + model = cls._create_model_from_response(resp) + logging.info(f"Model Creation: Model {model_id} instantiated.") + return model + else: + error_message = f"Model GET Error: Failed to retrieve model {model_id}. Status Code: {r.status_code}. Error: {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def create_asset_from_id(cls, model_id: Text) -> Model: @@ -198,14 +203,20 @@ def _get_assets_from_page( logging.info(f"Start service for POST Models Paginate - {url} - {headers} - {json.dumps(filter_params)}") r = _request_with_retry("post", url, headers=headers, json=filter_params) resp = r.json() - logging.info(f"Listing Models: Status of getting Models on Page {page_number}: {r.status_code}") - all_models = resp["items"] - model_list = [cls._create_model_from_response(model_info_json) for model_info_json in all_models] - return model_list, resp["total"] + except Exception as e: error_message = f"Listing Models: Error in getting Models on Page {page_number}: {e}" logging.error(error_message, exc_info=True) return [] + if 200 <= r.status_code < 300: + logging.info(f"Listing Models: Status of getting Models on Page {page_number}: {r.status_code}") + all_models = resp["items"] + model_list = [cls._create_model_from_response(model_info_json) for model_info_json in all_models] + return model_list, resp["total"] + else: + error_message = f"Listing Models Error: Failed to retrieve models. Status Code: {r.status_code}. Error: {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def list( @@ -237,30 +248,25 @@ def list( Returns: List[Model]: List of models based on given filters """ - try: - models, total = cls._get_assets_from_page( - query, - page_number, - page_size, - function, - suppliers, - source_languages, - target_languages, - is_finetunable, - ownership, - sort_by, - sort_order, - ) - return { - "results": models, - "page_total": min(page_size, len(models)), - "page_number": page_number, - "total": total, - } - except Exception as e: - error_message = f"Listing Models: Error in Listing Models : {e}" - logging.error(error_message, exc_info=True) - raise Exception(error_message) + models, total = cls._get_assets_from_page( + query, + page_number, + page_size, + function, + suppliers, + source_languages, + target_languages, + is_finetunable, + ownership, + sort_by, + sort_order, + ) + return { + "results": models, + "page_total": min(page_size, len(models)), + "page_number": page_number, + "total": total, + } @classmethod def list_host_machines(cls, api_key: Optional[Text] = None) -> List[Dict]: diff --git a/aixplain/factories/pipeline_factory/__init__.py b/aixplain/factories/pipeline_factory/__init__.py index cb4336fe..ef330de0 100644 --- a/aixplain/factories/pipeline_factory/__init__.py +++ b/aixplain/factories/pipeline_factory/__init__.py @@ -78,12 +78,7 @@ def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: logging.info(f"Start service for GET Pipeline - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() - # set api key - resp["api_key"] = config.TEAM_API_KEY - if api_key is not None: - resp["api_key"] = api_key - pipeline = build_from_response(resp, load_architecture=True) - return pipeline + except Exception as e: logging.exception(e) status_code = 400 @@ -95,6 +90,20 @@ def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: message = f"Pipeline Creation: Unspecified Error {e}" logging.error(message) raise Exception(f"Status {status_code}: {message}") + if 200 <= r.status_code < 300: + resp["api_key"] = config.TEAM_API_KEY + if api_key is not None: + resp["api_key"] = api_key + pipeline = build_from_response(resp, load_architecture=True) + logging.info(f"Pipeline {pipeline_id} retrieved successfully.") + return pipeline + + else: + error_message = ( + f"Pipeline GET Error: Failed to retrieve pipeline {pipeline_id}. Status Code: {r.status_code}. Error: {resp}" + ) + logging.error(error_message) + raise Exception(error_message) @classmethod def create_asset_from_id(cls, pipeline_id: Text) -> Pipeline: @@ -220,23 +229,33 @@ def list( payload["inputDataTypes"] = [data_type.value for data_type in output_data_types] logging.info(f"Start service for POST List Pipeline - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, json=payload) - resp = r.json() - - pipelines, page_total, total = [], 0, 0 - if "items" in resp: - results = resp["items"] - page_total = resp["pageTotal"] - total = resp["total"] - logging.info(f"Response for POST List Pipeline - Page Total: {page_total} / Total: {total}") - for pipeline in results: - pipelines.append(build_from_response(pipeline)) - return { - "results": pipelines, - "page_total": page_total, - "page_number": page_number, - "total": total, - } + try: + r = _request_with_retry("post", url, headers=headers, json=payload) + resp = r.json() + + except Exception as e: + error_message = f"Pipeline List Error: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + if 200 <= r.status_code < 300: + pipelines, page_total, total = [], 0, 0 + if "items" in resp: + results = resp["items"] + page_total = resp["pageTotal"] + total = resp["total"] + logging.info(f"Response for POST List Pipeline - Page Total: {page_total} / Total: {total}") + for pipeline in results: + pipelines.append(build_from_response(pipeline)) + return { + "results": pipelines, + "page_total": page_total, + "page_number": page_number, + "total": total, + } + else: + error_message = f"Pipeline List Error: Failed to retrieve pipelines. Status Code: {r.status_code}. Error: {resp}" + logging.error(error_message) + raise Exception(error_message) @classmethod def init(cls, name: Text, api_key: Optional[Text] = None) -> Pipeline: diff --git a/tests/unit/benchmark_test.py b/tests/unit/benchmark_test.py new file mode 100644 index 00000000..167e4bcb --- /dev/null +++ b/tests/unit/benchmark_test.py @@ -0,0 +1,70 @@ +import requests_mock +import pytest +from urllib.parse import urljoin +from aixplain.utils import config +from aixplain.factories import MetricFactory, BenchmarkFactory +from aixplain.modules.model import Model +from aixplain.modules.dataset import Dataset + + +def test_create_benchmark_error_response(): + metric_list = [MetricFactory.get("66df3e2d6eb56336b6628171")] + with requests_mock.Mocker() as mock: + name = "test-benchmark" + dataset_list = [ + Dataset( + id="dataset1", + name="Dataset 1", + description="Test dataset", + function="test_func", + source_data="src", + target_data="tgt", + onboard_status="onboarded", + ) + ] + model_list = [ + Model(id="model1", name="Model 1", description="Test model", supplier="Test supplier", cost=10, version="v1") + ] + + url = urljoin(config.BACKEND_URL, "sdk/benchmarks") + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + error_response = {"statusCode": 400, "message": "Invalid request"} + mock.post(url, headers=headers, json=error_response, status_code=400) + + with pytest.raises(Exception) as excinfo: + BenchmarkFactory.create(name=name, dataset_list=dataset_list, model_list=model_list, metric_list=metric_list) + + assert "Benchmark Creation Error: Status 400 - {'statusCode': 400, 'message': 'Invalid request'}" in str(excinfo.value) + + +def test_get_benchmark_error(): + with requests_mock.Mocker() as mock: + benchmark_id = "test-benchmark-id" + url = urljoin(config.BACKEND_URL, f"sdk/benchmarks/{benchmark_id}") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"statusCode": 404, "message": "Benchmark not found"} + mock.get(url, headers=headers, json=error_response, status_code=404) + + with pytest.raises(Exception) as excinfo: + BenchmarkFactory.get(benchmark_id) + + assert "Benchmark GET Error: Status 404 - {'statusCode': 404, 'message': 'Benchmark not found'}" in str(excinfo.value) + + +def test_list_normalization_options_error(): + metric = MetricFactory.get("66df3e2d6eb56336b6628171") + with requests_mock.Mocker() as mock: + model = Model(id="model1", name="Test Model", description="Test model", supplier="Test supplier", cost=10, version="v1") + + url = urljoin(config.BACKEND_URL, "sdk/benchmarks/normalization-options") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"message": "Internal Server Error"} + mock.post(url, headers=headers, json=error_response, status_code=500) + + with pytest.raises(Exception) as excinfo: + BenchmarkFactory.list_normalization_options(metric, model) + + assert "Error listing normalization options: Status 500 - {'message': 'Internal Server Error'}" in str(excinfo.value) diff --git a/tests/unit/corpus_test.py b/tests/unit/corpus_test.py new file mode 100644 index 00000000..07522c4d --- /dev/null +++ b/tests/unit/corpus_test.py @@ -0,0 +1,34 @@ +from aixplain.factories import CorpusFactory +import pytest +import requests_mock +from urllib.parse import urljoin +from aixplain.utils import config + + +def test_get_corpus_error_response(): + with requests_mock.Mocker() as mock: + corpus_id = "invalid_corpus_id" + url = urljoin(config.BACKEND_URL, f"sdk/corpora/{corpus_id}/overview") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"message": "Not Found"} + mock.get(url, headers=headers, json=error_response, status_code=404) + + with pytest.raises(Exception) as excinfo: + CorpusFactory.get(corpus_id=corpus_id) + + assert "Corpus GET Error: Status 404 - {'message': 'Not Found'}" in str(excinfo.value) + + +def test_list_corpus_error_response(): + with requests_mock.Mocker() as mock: + url = urljoin(config.BACKEND_URL, "sdk/corpora/paginate") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"message": "Internal Server Error"} + mock.post(url, headers=headers, json=error_response, status_code=500) + + with pytest.raises(Exception) as excinfo: + CorpusFactory.list(query="test_query", page_number=0, page_size=20) + + assert "Corpus List Error: Status 500 - {'message': 'Internal Server Error'}" in str(excinfo.value) diff --git a/tests/unit/dataset_test.py b/tests/unit/dataset_test.py new file mode 100644 index 00000000..25c57123 --- /dev/null +++ b/tests/unit/dataset_test.py @@ -0,0 +1,34 @@ +import pytest +import requests_mock +from aixplain.factories import DatasetFactory +from urllib.parse import urljoin +from aixplain.utils import config + + +def test_list_dataset_error_response(): + with requests_mock.Mocker() as mock: + url = urljoin(config.BACKEND_URL, "sdk/datasets/paginate") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"message": "Internal Server Error"} + mock.post(url, headers=headers, json=error_response, status_code=500) + + with pytest.raises(Exception) as excinfo: + DatasetFactory.list(query="test_query", page_number=0, page_size=20) + + assert "Dataset List Error: Status 500 - {'message': 'Internal Server Error'}" in str(excinfo.value) + + +def test_get_dataset_error_response(): + with requests_mock.Mocker() as mock: + dataset_id = "invalid_dataset_id" + url = urljoin(config.BACKEND_URL, f"sdk/datasets/{dataset_id}/overview") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"message": "Not Found"} + mock.get(url, headers=headers, json=error_response, status_code=404) + + with pytest.raises(Exception) as excinfo: + DatasetFactory.get(dataset_id=dataset_id) + + assert "Dataset GET Error: Status 404 - {'message': 'Not Found'}" in str(excinfo.value) diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index c52bb950..a319742c 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -24,6 +24,8 @@ import re from aixplain.utils import config from aixplain.modules import Model +from aixplain.factories import ModelFactory +from aixplain.enums import Function import pytest @@ -82,3 +84,43 @@ def test_run_async_errors(status_code, error_message): response = test_model.run_async(data="input_data") assert response["status"] == "FAILED" assert response["error_message"] == error_message + + +def test_get_model_error_response(): + with requests_mock.Mocker() as mock: + model_id = "test-model-id" + url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"statusCode": 404, "message": "Model not found"} + mock.get(url, headers=headers, json=error_response, status_code=404) + + with pytest.raises(Exception) as excinfo: + ModelFactory.get(model_id) + + assert "Model GET Error: Failed to retrieve model test-model-id" in str(excinfo.value) + + +def test_get_assets_from_page_error(): + with requests_mock.Mocker() as mock: + query = "test-query" + page_number = 0 + page_size = 2 + url = urljoin(config.BACKEND_URL, "sdk/models/paginate") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"statusCode": 500, "message": "Internal Server Error"} + mock.post(url, headers=headers, json=error_response, status_code=500) + + with pytest.raises(Exception) as excinfo: + ModelFactory._get_assets_from_page( + query=query, + page_number=page_number, + page_size=page_size, + function=Function.TEXT_GENERATION, + suppliers=None, + source_languages=None, + target_languages=None, + ) + + assert "Listing Models Error: Failed to retrieve models" in str(excinfo.value) diff --git a/tests/unit/pipeline_test.py b/tests/unit/pipeline_test.py index d3c1c725..05ee7172 100644 --- a/tests/unit/pipeline_test.py +++ b/tests/unit/pipeline_test.py @@ -38,27 +38,61 @@ def test_create_pipeline(): assert hyp_pipeline.id == ref_pipeline.id assert hyp_pipeline.name == ref_pipeline.name + @pytest.mark.parametrize( "status_code,error_message", [ - (401,"Unauthorized API key: Please verify the spelling of the API key and its current validity."), - (465,"Subscription-related error: Please ensure that your subscription is active and has not expired."), - (475,"Billing-related error: Please ensure you have enough credits to run this pipeline. "), - (485, "Supplier-related error: Please ensure that the selected supplier provides the pipeline you are trying to access."), + (401, "Unauthorized API key: Please verify the spelling of the API key and its current validity."), + (465, "Subscription-related error: Please ensure that your subscription is active and has not expired."), + (475, "Billing-related error: Please ensure you have enough credits to run this pipeline. "), + ( + 485, + "Supplier-related error: Please ensure that the selected supplier provides the pipeline you are trying to access.", + ), (495, "Validation-related error: Please ensure all required fields are provided and correctly formatted."), (501, "Status 501: Unspecified error: An unspecified error occurred while processing your request."), - ], ) - def test_run_async_errors(status_code, error_message): base_url = config.BACKEND_URL pipeline_id = "pipeline_id" execute_url = f"{base_url}/assets/pipeline/execution/run/{pipeline_id}" - + with requests_mock.Mocker() as mock: mock.post(execute_url, status_code=status_code) test_pipeline = Pipeline(id=pipeline_id, api_key=config.TEAM_API_KEY, name="Test Pipeline", url=base_url) response = test_pipeline.run_async(data="input_data") assert response["status"] == "FAILED" - assert response["error_message"] == error_message \ No newline at end of file + assert response["error_message"] == error_message + + +def test_list_pipelines_error_response(): + with requests_mock.Mocker() as mock: + query = "test-query" + page_number = 0 + page_size = 20 + url = urljoin(config.BACKEND_URL, "sdk/pipelines/paginate") + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + error_response = {"statusCode": 400, "message": "Bad Request"} + mock.post(url, headers=headers, json=error_response, status_code=400) + + with pytest.raises(Exception) as excinfo: + PipelineFactory.list(query=query, page_number=page_number, page_size=page_size) + + assert "Pipeline List Error: Failed to retrieve pipelines. Status Code: 400" in str(excinfo.value) + + +def test_get_pipeline_error_response(): + with requests_mock.Mocker() as mock: + pipeline_id = "test-pipeline-id" + url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{pipeline_id}") + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + error_response = {"statusCode": 404, "message": "Pipeline not found"} + mock.get(url, headers=headers, json=error_response, status_code=404) + + with pytest.raises(Exception) as excinfo: + PipelineFactory.get(pipeline_id=pipeline_id) + + assert "Pipeline GET Error: Failed to retrieve pipeline test-pipeline-id. Status Code: 404" in str(excinfo.value) From ecba34f4e9322acae70dc3e0e631b6b2ef366111 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:10:47 -0300 Subject: [PATCH 049/105] Max tokens and iterations in agents/teams (#276) --- aixplain/modules/agent/__init__.py | 16 ++++++++++++++++ aixplain/modules/team_agent/__init__.py | 17 ++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index 546ea4d8..a7586c8b 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -105,6 +105,8 @@ def run( parameters: Dict = {}, wait_time: float = 0.5, content: Optional[Union[Dict[Text, Text], List[Text]]] = None, + max_tokens: int = 2048, + max_iterations: int = 10, ) -> Dict: """Runs an agent call. @@ -118,6 +120,8 @@ def run( parameters (Dict, optional): optional parameters to the model. Defaults to "{}". wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. + max_tokens (int, optional): maximum number of tokens which can be generated by the agent. Defaults to 2048. + max_iterations (int, optional): maximum number of iterations between the agent and the tools. Defaults to 10. Returns: Dict: parsed output from model @@ -132,6 +136,8 @@ def run( name=name, parameters=parameters, content=content, + max_tokens=max_tokens, + max_iterations=max_iterations, ) if response["status"] == "FAILED": end = time.time() @@ -156,6 +162,8 @@ def run_async( name: Text = "model_process", parameters: Dict = {}, content: Optional[Union[Dict[Text, Text], List[Text]]] = None, + max_tokens: int = 2048, + max_iterations: int = 10, ) -> Dict: """Runs asynchronously an agent call. @@ -167,6 +175,8 @@ def run_async( name (Text, optional): ID given to a call. Defaults to "model_process". parameters (Dict, optional): optional parameters to the model. Defaults to "{}". content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. + max_tokens (int, optional): maximum number of tokens which can be generated by the agent. Defaults to 2048. + max_iterations (int, optional): maximum number of iterations between the agent and the tools. Defaults to 10. Returns: dict: polling URL in response @@ -205,6 +215,12 @@ def run_async( headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} payload = {"id": self.id, "query": FileFactory.to_link(query), "sessionId": session_id, "history": history} + parameters.update( + { + "max_tokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, + "max_iterations": parameters["max_iterations"] if "max_iterations" in parameters else max_iterations, + } + ) payload.update(parameters) payload = json.dumps(payload) diff --git a/aixplain/modules/team_agent/__init__.py b/aixplain/modules/team_agent/__init__.py index 420fc23a..86321489 100644 --- a/aixplain/modules/team_agent/__init__.py +++ b/aixplain/modules/team_agent/__init__.py @@ -108,6 +108,8 @@ def run( parameters: Dict = {}, wait_time: float = 0.5, content: Optional[Union[Dict[Text, Text], List[Text]]] = None, + max_tokens: int = 2048, + max_iterations: int = 30, ) -> Dict: """Runs a team agent call. @@ -121,7 +123,8 @@ def run( parameters (Dict, optional): optional parameters to the model. Defaults to "{}". wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. - + max_tokens (int, optional): maximum number of tokens which can be generated by the agents. Defaults to 2048. + max_iterations (int, optional): maximum number of iterations between the agents. Defaults to 30. Returns: Dict: parsed output from model """ @@ -135,6 +138,8 @@ def run( name=name, parameters=parameters, content=content, + max_tokens=max_tokens, + max_iterations=max_iterations, ) if response["status"] == "FAILED": end = time.time() @@ -159,6 +164,8 @@ def run_async( name: Text = "model_process", parameters: Dict = {}, content: Optional[Union[Dict[Text, Text], List[Text]]] = None, + max_tokens: int = 2048, + max_iterations: int = 30, ) -> Dict: """Runs asynchronously a Team Agent call. @@ -170,6 +177,8 @@ def run_async( name (Text, optional): ID given to a call. Defaults to "model_process". parameters (Dict, optional): optional parameters to the model. Defaults to "{}". content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. + max_tokens (int, optional): maximum number of tokens which can be generated by the agents. Defaults to 2048. + max_iterations (int, optional): maximum number of iterations between the agents. Defaults to 30. Returns: dict: polling URL in response @@ -208,6 +217,12 @@ def run_async( headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} payload = {"id": self.id, "query": FileFactory.to_link(query), "sessionId": session_id, "history": history} + parameters.update( + { + "max_tokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, + "max_iterations": parameters["max_iterations"] if "max_iterations" in parameters else max_iterations, + } + ) payload.update(parameters) payload = json.dumps(payload) From 2127cc59e2ee37786d081db43ceff7cba2de5614 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:11:01 -0300 Subject: [PATCH 050/105] Update model running endpoints from v1 to v2 (#275) * Update model running endpoints from v1 to v2 * Support v1/v2 model serving endpoints * Adding the details in case of validation error * Adding one more test for asynchronous method in new endpoint * Setting error details and treating when data is null * Adding missing import in unit tests for models --- aixplain/modules/model/__init__.py | 89 +++---------- aixplain/modules/model/llm_model.py | 119 ++++++------------ aixplain/modules/model/utils.py | 75 +++++++++++ aixplain/utils/config.py | 4 +- .../data/asset_run_test_data.json | 4 + tests/functional/model/run_model_test.py | 12 ++ tests/unit/llm_test.py | 45 ++++--- tests/unit/model_test.py | 84 +++++++++++-- 8 files changed, 253 insertions(+), 179 deletions(-) create mode 100644 aixplain/modules/model/utils.py diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 2e9445b5..621eb522 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -21,11 +21,11 @@ Model Class """ import time -import json import logging import traceback from aixplain.enums import Supplier, Function from aixplain.modules.asset import Asset +from aixplain.modules.model.utils import build_payload, call_run_endpoint from aixplain.utils import config from urllib.parse import urljoin from aixplain.utils.file_utils import _request_with_retry @@ -149,7 +149,7 @@ def sync_poll(self, poll_url: Text, name: Text = "model_process", wait_time: flo logging.error(f"Polling for Model: polling for {name}: {e}") break if response_body["completed"] is True: - logging.info(f"Polling for Model: Final status of polling for {name}: {response_body}") + logging.debug(f"Polling for Model: Final status of polling for {name}: {response_body}") else: response_body["status"] = "FAILED" logging.error( @@ -204,21 +204,21 @@ def run( Dict: parsed output from model """ start = time.time() - try: - response = self.run_async(data, name=name, parameters=parameters) - if response["status"] == "FAILED": + payload = build_payload(data=data, parameters=parameters) + url = f"{self.url}/api/v2/execute/{self.id}" + logging.debug(f"Model Run Sync: Start service for {name} - {url}") + response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) + if response["status"] == "IN_PROGRESS": + try: + poll_url = response["url"] end = time.time() - response["elapsed_time"] = end - start - return response - poll_url = response["url"] - end = time.time() - response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) - return response - except Exception as e: - msg = f"Error in request for {name} - {traceback.format_exc()}" - logging.error(f"Model Run: Error in running for {name}: {e}") - end = time.time() - return {"status": "FAILED", "error": msg, "elapsed_time": end - start} + response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) + except Exception as e: + msg = f"Error in request for {name} - {traceback.format_exc()}" + logging.error(f"Model Run: Error in running for {name}: {e}") + end = time.time() + response = {"status": "FAILED", "error": msg, "elapsed_time": end - start} + return response def run_async(self, data: Union[Text, Dict], name: Text = "model_process", parameters: Dict = {}) -> Dict: """Runs asynchronously a model call. @@ -231,59 +231,10 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param Returns: dict: polling URL in response """ - headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} - from aixplain.factories.file_factory import FileFactory - - data = FileFactory.to_link(data) - if isinstance(data, dict): - payload = data - else: - try: - payload = json.loads(data) - if isinstance(payload, dict) is False: - if isinstance(payload, int) is True or isinstance(payload, float) is True: - payload = str(payload) - payload = {"data": payload} - except Exception: - payload = {"data": data} - payload.update(parameters) - payload = json.dumps(payload) - - call_url = f"{self.url}/{self.id}" - r = _request_with_retry("post", call_url, headers=headers, data=payload) - logging.info(f"Model Run Async: Start service for {name} - {self.url} - {payload} - {headers}") - - resp = None - try: - if 200 <= r.status_code < 300: - resp = r.json() - logging.info(f"Result of request for {name} - {r.status_code} - {resp}") - poll_url = resp["data"] - response = {"status": "IN_PROGRESS", "url": poll_url} - else: - if r.status_code == 401: - error = "Unauthorized API key: Please verify the spelling of the API key and its current validity." - elif 460 <= r.status_code < 470: - error = "Subscription-related error: Please ensure that your subscription is active and has not expired." - elif 470 <= r.status_code < 480: - error = "Billing-related error: Please ensure you have enough credits to run this model. " - elif 480 <= r.status_code < 490: - error = "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access." - elif 490 <= r.status_code < 500: - error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." - else: - status_code = str(r.status_code) - error = ( - f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." - ) - response = {"status": "FAILED", "error_message": error} - logging.error(f"Error in request for {name} - {r.status_code}: {error}") - except Exception: - response = {"status": "FAILED"} - msg = f"Error in request for {name} - {traceback.format_exc()}" - logging.error(f"Model Run Async: Error in running for {name}: {resp}") - if resp is not None: - response["error"] = msg + url = f"{self.url}/api/v1/execute/{self.id}" + logging.debug(f"Model Run Async: Start service for {name} - {url}") + payload = build_payload(data=data, parameters=parameters) + response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) return response def check_finetune_status(self, after_epoch: Optional[int] = None): diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index c595d207..84db6704 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -21,13 +21,12 @@ Large Language Model Class """ import time -import json import logging import traceback from aixplain.enums import Function, Supplier from aixplain.modules.model import Model +from aixplain.modules.model.utils import build_payload, call_run_endpoint from aixplain.utils import config -from aixplain.utils.file_utils import _request_with_retry from typing import Union, Optional, List, Text, Dict @@ -125,31 +124,31 @@ def run( Dict: parsed output from model """ start = time.time() - try: - response = self.run_async( - data, - name=name, - temperature=temperature, - max_tokens=max_tokens, - top_p=top_p, - context=context, - prompt=prompt, - history=history, - parameters=parameters, - ) - if response["status"] == "FAILED": + parameters.update( + { + "context": parameters["context"] if "context" in parameters else context, + "prompt": parameters["prompt"] if "prompt" in parameters else prompt, + "history": parameters["history"] if "history" in parameters else history, + "temperature": parameters["temperature"] if "temperature" in parameters else temperature, + "max_tokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, + "top_p": parameters["top_p"] if "top_p" in parameters else top_p, + } + ) + payload = build_payload(data=data, parameters=parameters) + url = f"{self.url}/api/v2/execute/{self.id}" + logging.debug(f"Model Run Sync: Start service for {name} - {url}") + response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) + if response["status"] == "IN_PROGRESS": + try: + poll_url = response["url"] end = time.time() - response["elapsed_time"] = end - start - return response - poll_url = response["url"] - end = time.time() - response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) - return response - except Exception as e: - msg = f"Error in request for {name} - {traceback.format_exc()}" - logging.error(f"LLM Run: Error in running for {name}: {e}") - end = time.time() - return {"status": "FAILED", "error": msg, "elapsed_time": end - start} + response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) + except Exception as e: + msg = f"Error in request for {name} - {traceback.format_exc()}" + logging.error(f"Model Run: Error in running for {name}: {e}") + end = time.time() + response = {"status": "FAILED", "error": msg, "elapsed_time": end - start} + return response def run_async( self, @@ -179,66 +178,18 @@ def run_async( Returns: dict: polling URL in response """ - headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} - - from aixplain.factories.file_factory import FileFactory - - data = FileFactory.to_link(data) - if isinstance(data, dict): - payload = data - else: - try: - payload = json.loads(data) - if isinstance(payload, dict) is False: - if isinstance(payload, int) is True or isinstance(payload, float) is True: - payload = str(payload) - payload = {"data": payload} - except Exception: - payload = {"data": data} + url = f"{self.url}/api/v1/execute/{self.id}" + logging.debug(f"Model Run Async: Start service for {name} - {url}") parameters.update( { - "context": payload["context"] if "context" in payload else context, - "prompt": payload["prompt"] if "prompt" in payload else prompt, - "history": payload["history"] if "history" in payload else history, - "temperature": payload["temperature"] if "temperature" in payload else temperature, - "max_tokens": payload["max_tokens"] if "max_tokens" in payload else max_tokens, - "top_p": payload["top_p"] if "top_p" in payload else top_p, + "context": parameters["context"] if "context" in parameters else context, + "prompt": parameters["prompt"] if "prompt" in parameters else prompt, + "history": parameters["history"] if "history" in parameters else history, + "temperature": parameters["temperature"] if "temperature" in parameters else temperature, + "max_tokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, + "top_p": parameters["top_p"] if "top_p" in parameters else top_p, } ) - payload.update(parameters) - payload = json.dumps(payload) - - call_url = f"{self.url}/{self.id}" - r = _request_with_retry("post", call_url, headers=headers, data=payload) - logging.info(f"Model Run Async: Start service for {name} - {self.url} - {payload} - {headers}") - - resp = None - try: - if 200 <= r.status_code < 300: - resp = r.json() - logging.info(f"Result of request for {name} - {r.status_code} - {resp}") - poll_url = resp["data"] - response = {"status": "IN_PROGRESS", "url": poll_url} - else: - if r.status_code == 401: - error = "Unauthorized API key: Please verify the spelling of the API key and its current validity." - elif 460 <= r.status_code < 470: - error = "Subscription-related error: Please ensure that your subscription is active and has not expired." - elif 470 <= r.status_code < 480: - error = "Billing-related error: Please ensure you have enough credits to run this model. " - elif 480 <= r.status_code < 490: - error = "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access." - elif 490 <= r.status_code < 500: - error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." - else: - status_code = str(r.status_code) - error = f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." - response = {"status": "FAILED", "error_message": error} - logging.error(f"Error in request for {name} - {r.status_code}: {error}") - except Exception: - response = {"status": "FAILED"} - msg = f"Error in request for {name} - {traceback.format_exc()}" - logging.error(f"Model Run Async: Error in running for {name}: {resp}") - if resp is not None: - response["error"] = msg + payload = build_payload(data=data, parameters=parameters) + response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) return response diff --git a/aixplain/modules/model/utils.py b/aixplain/modules/model/utils.py new file mode 100644 index 00000000..a78455b7 --- /dev/null +++ b/aixplain/modules/model/utils.py @@ -0,0 +1,75 @@ +__author__ = "thiagocastroferreira" + +import json +import logging +from aixplain.utils.file_utils import _request_with_retry +from typing import Dict, Text, Union + + +def build_payload(data: Union[Text, Dict], parameters: Dict = {}): + from aixplain.factories import FileFactory + + data = FileFactory.to_link(data) + if isinstance(data, dict): + payload = data + else: + try: + payload = json.loads(data) + if isinstance(payload, dict) is False: + if isinstance(payload, int) is True or isinstance(payload, float) is True: + payload = str(payload) + payload = {"data": payload} + except Exception: + payload = {"data": data} + payload.update(parameters) + payload = json.dumps(payload) + return payload + + +def call_run_endpoint(url: Text, api_key: Text, payload: Dict) -> Dict: + headers = {"x-api-key": api_key, "Content-Type": "application/json"} + + resp = "unspecified error" + try: + r = _request_with_retry("post", url, headers=headers, data=payload) + resp = r.json() + except Exception as e: + logging.error(f"Error in request: {e}") + response = { + "status": "FAILED", + "completed": True, + "error_message": "Model Run: An error occurred while processing your request.", + } + + if 200 <= r.status_code < 300: + logging.info(f"Result of request: {r.status_code} - {resp}") + status = resp.get("status", "IN_PROGRESS") + data = resp.get("data", None) + if status == "IN_PROGRESS": + if data is not None: + response = {"status": status, "url": data, "completed": True} + else: + response = { + "status": "FAILED", + "completed": True, + "error_message": "Model Run: An error occurred while processing your request.", + } + else: + response = {"status": status, "data": data, "completed": True} + else: + if r.status_code == 401: + error = f"Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: {resp}" + elif 460 <= r.status_code < 470: + error = f"Subscription-related error: Please ensure that your subscription is active and has not expired. Details: {resp}" + elif 470 <= r.status_code < 480: + error = f"Billing-related error: Please ensure you have enough credits to run this model. Details: {resp}" + elif 480 <= r.status_code < 490: + error = f"Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access. Details: {resp}" + elif 490 <= r.status_code < 500: + error = f"Validation-related error: Please ensure all required fields are provided and correctly formatted. Details: {resp}" + else: + status_code = str(r.status_code) + error = f"Status {status_code} - Unspecified error: {resp}" + response = {"status": "FAILED", "error_message": error, "completed": True} + logging.error(f"Error in request: {r.status_code}: {error}") + return response diff --git a/aixplain/utils/config.py b/aixplain/utils/config.py index 3bb0eb09..59805c60 100644 --- a/aixplain/utils/config.py +++ b/aixplain/utils/config.py @@ -19,11 +19,11 @@ logger = logging.getLogger(__name__) BACKEND_URL = os.getenv("BACKEND_URL", "https://platform-api.aixplain.com") -MODELS_RUN_URL = os.getenv("MODELS_RUN_URL", "https://models.aixplain.com/api/v1/execute") +MODELS_RUN_URL = os.getenv("MODELS_RUN_URL", "https://models.aixplain.com") # GET THE API KEY FROM CMD TEAM_API_KEY = os.getenv("TEAM_API_KEY", "") AIXPLAIN_API_KEY = os.getenv("AIXPLAIN_API_KEY", "") PIPELINE_API_KEY = os.getenv("PIPELINE_API_KEY", "") MODEL_API_KEY = os.getenv("MODEL_API_KEY", "") LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") -HF_TOKEN = os.getenv("HF_TOKEN", "") \ No newline at end of file +HF_TOKEN = os.getenv("HF_TOKEN", "") diff --git a/tests/functional/general_assets/data/asset_run_test_data.json b/tests/functional/general_assets/data/asset_run_test_data.json index abe7a3e9..e24df1ef 100644 --- a/tests/functional/general_assets/data/asset_run_test_data.json +++ b/tests/functional/general_assets/data/asset_run_test_data.json @@ -3,6 +3,10 @@ "id" : "61b097551efecf30109d32da", "data": "This is a test sentence." }, + "model2" : { + "id" : "60ddefab8d38c51c5885ee38", + "data": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/myname.mp3" + }, "pipeline": { "name": "SingleNodePipeline", "data": "This is a test sentence." diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py index 47f351bb..d5c1d6ac 100644 --- a/tests/functional/model/run_model_test.py +++ b/tests/functional/model/run_model_test.py @@ -31,3 +31,15 @@ def test_llm_run(llm_model): ) assert response["status"] == "SUCCESS" assert "thiago" in response["data"].lower() + + +def test_run_async(): + """Testing Model Async""" + model = ModelFactory.get("60ddef828d38c51c5885d491") + + response = model.run_async("Test") + poll_url = response["url"] + response = model.sync_poll(poll_url) + + assert response["status"] == "SUCCESS" + assert "teste" in response["data"].lower() diff --git a/tests/unit/llm_test.py b/tests/unit/llm_test.py index 430fc338..f76f71b2 100644 --- a/tests/unit/llm_test.py +++ b/tests/unit/llm_test.py @@ -1,6 +1,4 @@ - from dotenv import load_dotenv -from urllib.parse import urljoin import requests_mock from aixplain.enums import Function @@ -10,27 +8,44 @@ import pytest + @pytest.mark.parametrize( "status_code,error_message", [ - (401,"Unauthorized API key: Please verify the spelling of the API key and its current validity."), - (465,"Subscription-related error: Please ensure that your subscription is active and has not expired."), - (475,"Billing-related error: Please ensure you have enough credits to run this model. "), - (485, "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access."), - (495, "Validation-related error: Please ensure all required fields are provided and correctly formatted."), - (501, "Status 501: Unspecified error: An unspecified error occurred while processing your request."), - + ( + 401, + "Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + ( + 465, + "Subscription-related error: Please ensure that your subscription is active and has not expired. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + ( + 475, + "Billing-related error: Please ensure you have enough credits to run this model. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + ( + 485, + "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + ( + 495, + "Validation-related error: Please ensure all required fields are provided and correctly formatted. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + (501, "Status 501 - Unspecified error: {'error': 'An unspecified error occurred while processing your request.'}"), ], ) - def test_run_async_errors(status_code, error_message): base_url = config.MODELS_RUN_URL llm_id = "llm-id" - execute_url = urljoin(base_url, f"execute/{llm_id}") - + execute_url = f"{base_url}/api/v1/execute/{llm_id}" + ref_response = { + "error": "An unspecified error occurred while processing your request.", + } + with requests_mock.Mocker() as mock: - mock.post(execute_url, status_code=status_code) - test_llm = LLM(id=llm_id, name="Test llm",url=base_url, function=Function.TEXT_GENERATION) + mock.post(execute_url, status_code=status_code, json=ref_response) + test_llm = LLM(id=llm_id, name="Test llm", url=base_url, function=Function.TEXT_GENERATION) response = test_llm.run_async(data="input_data") assert response["status"] == "FAILED" - assert response["error_message"] == error_message \ No newline at end of file + assert response["error_message"] == error_message diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index a319742c..d491c1fd 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -17,19 +17,67 @@ """ from dotenv import load_dotenv -from urllib.parse import urljoin import requests_mock load_dotenv() import re +import json from aixplain.utils import config from aixplain.modules import Model +from aixplain.modules.model.utils import build_payload, call_run_endpoint from aixplain.factories import ModelFactory from aixplain.enums import Function +from urllib.parse import urljoin import pytest +def test_build_payload(): + data = "input_data" + parameters = {"context": "context_data"} + ref_payload = json.dumps({"data": data, **parameters}) + hyp_payload = build_payload(data, parameters) + assert hyp_payload == ref_payload + + +def test_call_run_endpoint_async(): + base_url = config.MODELS_RUN_URL + model_id = "model-id" + execute_url = f"{base_url}/api/v1/execute/{model_id}" + payload = {"data": "input_data"} + ref_response = { + "completed": True, + "status": "IN_PROGRESS", + "data": "https://models.aixplain.com/api/v1/data/a90c2078-edfe-403f-acba-d2d94cf71f42", + } + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=ref_response) + response = call_run_endpoint(url=execute_url, api_key=config.TEAM_API_KEY, payload=payload) + + print(response) + assert response["completed"] == ref_response["completed"] + assert response["status"] == ref_response["status"] + assert response["url"] == ref_response["data"] + + +def test_call_run_endpoint_sync(): + base_url = config.MODELS_RUN_URL + model_id = "model-id" + execute_url = f"{base_url}/api/v1/execute/{model_id}" + payload = {"data": "input_data"} + ref_response = {"completed": True, "status": "SUCCESS", "data": "Hello"} + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=ref_response) + response = call_run_endpoint(url=execute_url, api_key=config.TEAM_API_KEY, payload=payload) + + print(response) + assert response["completed"] == ref_response["completed"] + assert response["status"] == ref_response["status"] + assert response["data"] == ref_response["data"] + + def test_success_poll(): with requests_mock.Mocker() as mock: poll_url = "https://models.aixplain.com/api/v1/data/a90c2078-edfe-403f-acba-d2d94cf71f42" @@ -65,21 +113,39 @@ def test_failed_poll(): @pytest.mark.parametrize( "status_code,error_message", [ - (401, "Unauthorized API key: Please verify the spelling of the API key and its current validity."), - (465, "Subscription-related error: Please ensure that your subscription is active and has not expired."), - (475, "Billing-related error: Please ensure you have enough credits to run this model. "), - (485, "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access."), - (495, "Validation-related error: Please ensure all required fields are provided and correctly formatted."), - (501, "Status 501: Unspecified error: An unspecified error occurred while processing your request."), + ( + 401, + "Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + ( + 465, + "Subscription-related error: Please ensure that your subscription is active and has not expired. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + ( + 475, + "Billing-related error: Please ensure you have enough credits to run this model. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + ( + 485, + "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + ( + 495, + "Validation-related error: Please ensure all required fields are provided and correctly formatted. Details: {'error': 'An unspecified error occurred while processing your request.'}", + ), + (501, "Status 501 - Unspecified error: {'error': 'An unspecified error occurred while processing your request.'}"), ], ) def test_run_async_errors(status_code, error_message): base_url = config.MODELS_RUN_URL model_id = "model-id" - execute_url = urljoin(base_url, f"execute/{model_id}") + execute_url = f"{base_url}/api/v1/execute/{model_id}" + ref_response = { + "error": "An unspecified error occurred while processing your request.", + } with requests_mock.Mocker() as mock: - mock.post(execute_url, status_code=status_code) + mock.post(execute_url, status_code=status_code, json=ref_response) test_model = Model(id=model_id, name="Test Model", url=base_url) response = test_model.run_async(data="input_data") assert response["status"] == "FAILED" From 736a7b10f24e0b5af83875f468820e3f618dcf0a Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:46:55 -0300 Subject: [PATCH 051/105] Eng 711 new model endpoints (#278) * Update model running endpoints from v1 to v2 * Support v1/v2 model serving endpoints * Adding the details in case of validation error * Adding one more test for asynchronous method in new endpoint * Setting error details and treating when data is null * Adding missing import in unit tests for models * Having endpoint v1 by default and setting v2 on the fly --- aixplain/modules/model/__init__.py | 4 ++-- aixplain/modules/model/llm_model.py | 4 ++-- tests/unit/llm_test.py | 2 +- tests/unit/model_test.py | 7 +++---- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 621eb522..441027d4 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -205,7 +205,7 @@ def run( """ start = time.time() payload = build_payload(data=data, parameters=parameters) - url = f"{self.url}/api/v2/execute/{self.id}" + url = f"{self.url}/{self.id}".replace("api/v1/execute", "api/v2/execute") logging.debug(f"Model Run Sync: Start service for {name} - {url}") response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) if response["status"] == "IN_PROGRESS": @@ -231,7 +231,7 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param Returns: dict: polling URL in response """ - url = f"{self.url}/api/v1/execute/{self.id}" + url = f"{self.url}/{self.id}" logging.debug(f"Model Run Async: Start service for {name} - {url}") payload = build_payload(data=data, parameters=parameters) response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 84db6704..876b24dc 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -135,7 +135,7 @@ def run( } ) payload = build_payload(data=data, parameters=parameters) - url = f"{self.url}/api/v2/execute/{self.id}" + url = f"{self.url}/{self.id}".replace("/api/v1/execute", "/api/v2/execute") logging.debug(f"Model Run Sync: Start service for {name} - {url}") response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) if response["status"] == "IN_PROGRESS": @@ -178,7 +178,7 @@ def run_async( Returns: dict: polling URL in response """ - url = f"{self.url}/api/v1/execute/{self.id}" + url = f"{self.url}/{self.id}" logging.debug(f"Model Run Async: Start service for {name} - {url}") parameters.update( { diff --git a/tests/unit/llm_test.py b/tests/unit/llm_test.py index f76f71b2..b0dbe19a 100644 --- a/tests/unit/llm_test.py +++ b/tests/unit/llm_test.py @@ -38,7 +38,7 @@ def test_run_async_errors(status_code, error_message): base_url = config.MODELS_RUN_URL llm_id = "llm-id" - execute_url = f"{base_url}/api/v1/execute/{llm_id}" + execute_url = f"{base_url}/{llm_id}" ref_response = { "error": "An unspecified error occurred while processing your request.", } diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index d491c1fd..0907b8f1 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -43,7 +43,7 @@ def test_build_payload(): def test_call_run_endpoint_async(): base_url = config.MODELS_RUN_URL model_id = "model-id" - execute_url = f"{base_url}/api/v1/execute/{model_id}" + execute_url = f"{base_url}/{model_id}" payload = {"data": "input_data"} ref_response = { "completed": True, @@ -64,7 +64,7 @@ def test_call_run_endpoint_async(): def test_call_run_endpoint_sync(): base_url = config.MODELS_RUN_URL model_id = "model-id" - execute_url = f"{base_url}/api/v1/execute/{model_id}" + execute_url = f"{base_url}/{model_id}".replace("/api/v1/execute", "/api/v2/execute") payload = {"data": "input_data"} ref_response = {"completed": True, "status": "SUCCESS", "data": "Hello"} @@ -72,7 +72,6 @@ def test_call_run_endpoint_sync(): mock.post(execute_url, json=ref_response) response = call_run_endpoint(url=execute_url, api_key=config.TEAM_API_KEY, payload=payload) - print(response) assert response["completed"] == ref_response["completed"] assert response["status"] == ref_response["status"] assert response["data"] == ref_response["data"] @@ -139,7 +138,7 @@ def test_failed_poll(): def test_run_async_errors(status_code, error_message): base_url = config.MODELS_RUN_URL model_id = "model-id" - execute_url = f"{base_url}/api/v1/execute/{model_id}" + execute_url = f"{base_url}/{model_id}" ref_response = { "error": "An unspecified error occurred while processing your request.", } From fa3353126e27b11c89126cccf78218556350af38 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 25 Oct 2024 16:26:49 -0300 Subject: [PATCH 052/105] Group of Improvements in API Key CRUD (#277) * assetLimits -> assetsLimits * Update api key usage limit service * Remove option for asset ID filtering in the URLs for getting usage * Update version * Full response on synchronous model execution * Get API Key service * Change the name to daily count limits * Explicit point to the error field when it exists * Validate api key before updating it * Create Setters for token and request limits --- aixplain/factories/api_key_factory.py | 40 +++++++---- aixplain/modules/__init__.py | 2 +- aixplain/modules/api_key.py | 97 ++++++++++++++++++++------- aixplain/modules/model/utils.py | 5 +- aixplain/utils/config.py | 2 +- pyproject.toml | 2 +- tests/functional/apikey/test_api.py | 27 +++++--- tests/unit/api_key_test.py | 67 ++++++++++++++++-- tests/unit/llm_test.py | 12 ++-- tests/unit/model_test.py | 12 ++-- 10 files changed, 193 insertions(+), 73 deletions(-) diff --git a/aixplain/factories/api_key_factory.py b/aixplain/factories/api_key_factory.py index 4ac8f00a..c719c26b 100644 --- a/aixplain/factories/api_key_factory.py +++ b/aixplain/factories/api_key_factory.py @@ -4,12 +4,20 @@ from datetime import datetime from typing import Text, List, Optional, Dict, Union from aixplain.utils.file_utils import _request_with_retry -from aixplain.modules.api_key import APIKey, APIKeyGlobalLimits, APIKeyUsageLimit +from aixplain.modules.api_key import APIKey, APIKeyLimits, APIKeyUsageLimit class APIKeyFactory: backend_url = config.BACKEND_URL + @classmethod + def get(cls, api_key: Text) -> APIKey: + """Get an API key""" + for api_key_obj in cls.list(): + if str(api_key_obj.access_key).startswith(api_key[:4]) and str(api_key_obj.access_key).endswith(api_key[-4:]): + return api_key_obj + raise Exception(f"API Key Error: API key {api_key} not found") + @classmethod def list(cls) -> List[APIKey]: """List all API keys""" @@ -30,7 +38,7 @@ def list(cls) -> List[APIKey]: name=key["name"], budget=key["budget"] if "budget" in key else None, global_limits=key["globalLimits"] if "globalLimits" in key else None, - asset_limits=key["assetLimits"] if "assetLimits" in key else [], + asset_limits=key["assetsLimits"] if "assetsLimits" in key else [], expires_at=key["expiresAt"] if "expiresAt" in key else None, access_key=key["accessKey"], is_admin=key["isAdmin"], @@ -46,8 +54,8 @@ def create( cls, name: Text, budget: int, - global_limits: Union[Dict, APIKeyGlobalLimits], - asset_limits: List[Union[Dict, APIKeyGlobalLimits]], + global_limits: Union[Dict, APIKeyLimits], + asset_limits: List[Union[Dict, APIKeyLimits]], expires_at: datetime, ) -> APIKey: """Create a new API key""" @@ -84,6 +92,7 @@ def create( @classmethod def update(cls, api_key: APIKey) -> APIKey: """Update an existing API key""" + api_key.validate() try: resp = "Unspecified error" url = f"{cls.backend_url}/sdk/api-keys/{api_key.id}" @@ -112,12 +121,10 @@ def update(cls, api_key: APIKey) -> APIKey: raise Exception(f"API Key Update Error: Failed to update API key with ID {api_key.id}. Error: {str(resp)}") @classmethod - def get_usage_limit(cls, api_key: Text = config.TEAM_API_KEY, asset_id: Optional[Text] = None) -> APIKeyUsageLimit: - """Get API key usage limit""" + def get_usage_limits(cls, api_key: Text = config.TEAM_API_KEY, asset_id: Optional[Text] = None) -> List[APIKeyUsageLimit]: + """Get API key usage limits""" try: url = f"{config.BACKEND_URL}/sdk/api-keys/usage-limits" - if asset_id is not None: - url += f"?assetId={asset_id}" headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} logging.info(f"Start service for GET API Key Usage - {url} - {headers}") r = _request_with_retry("GET", url, headers=headers) @@ -128,11 +135,16 @@ def get_usage_limit(cls, api_key: Text = config.TEAM_API_KEY, asset_id: Optional raise Exception(f"{message}") if 200 <= r.status_code < 300: - return APIKeyUsageLimit( - request_count=resp["requestCount"], - request_count_limit=resp["requestCountLimit"], - token_count=resp["tokenCount"], - token_count_limit=resp["tokenCountLimit"], - ) + return [ + APIKeyUsageLimit( + daily_request_count=limit["requestCount"], + daily_request_limit=limit["requestCountLimit"], + daily_token_count=limit["tokenCount"], + daily_token_limit=limit["tokenCountLimit"], + model=limit["assetId"] if "assetId" in limit else None, + ) + for limit in resp + if asset_id is None or ("assetId" in limit and limit["assetId"] == asset_id) + ] else: raise Exception(f"API Key Usage Error: Failed to get usage. Error: {str(resp)}") diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index d49e29d4..4432e1ad 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -36,4 +36,4 @@ from .agent import Agent from .agent.tool import Tool from .team_agent import TeamAgent -from .api_key import APIKey, APIKeyGlobalLimits, APIKeyUsageLimit +from .api_key import APIKey, APIKeyLimits, APIKeyUsageLimit diff --git a/aixplain/modules/api_key.py b/aixplain/modules/api_key.py index 886b0dab..ae774c23 100644 --- a/aixplain/modules/api_key.py +++ b/aixplain/modules/api_key.py @@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Text, Union -class APIKeyGlobalLimits: +class APIKeyLimits: def __init__( self, token_per_minute: int, @@ -27,19 +27,31 @@ def __init__( class APIKeyUsageLimit: - def __init__(self, request_count: int, request_count_limit: int, token_count: int, token_count_limit: int): - """Get the usage limits of an API key + def __init__( + self, + daily_request_count: int, + daily_request_limit: int, + daily_token_count: int, + daily_token_limit: int, + model: Optional[Union[Text, Model]] = None, + ): + """Get the usage limits of an API key globally (model equals to None) or for a specific model. Args: - request_count (int): number of requests made - request_count_limit (int): limit of requests - token_count (int): number of tokens used - token_count_limit (int): limit of tokens + daily_request_count (int): number of requests made + daily_request_limit (int): limit of requests + daily_token_count (int): number of tokens used + daily_token_limit (int): limit of tokens + model (Optional[Union[Text, Model]], optional): Model which the limits apply. Defaults to None. """ - self.request_count = request_count - self.request_count_limit = request_count_limit - self.token_count = token_count - self.token_count_limit = token_count_limit + self.daily_request_count = daily_request_count + self.daily_request_limit = daily_request_limit + self.daily_token_count = daily_token_count + self.daily_token_limit = daily_token_limit + if model is not None and isinstance(model, str): + from aixplain.factories import ModelFactory + + self.model = ModelFactory.get(model) class APIKey: @@ -48,8 +60,8 @@ def __init__( name: Text, expires_at: Optional[Union[datetime, Text]] = None, budget: Optional[float] = None, - asset_limits: List[APIKeyGlobalLimits] = [], - global_limits: Optional[Union[Dict, APIKeyGlobalLimits]] = None, + asset_limits: List[APIKeyLimits] = [], + global_limits: Optional[Union[Dict, APIKeyLimits]] = None, id: int = "", access_key: Optional[Text] = None, is_admin: bool = False, @@ -59,7 +71,7 @@ def __init__( self.budget = budget self.global_limits = global_limits if global_limits is not None and isinstance(global_limits, dict): - self.global_limits = APIKeyGlobalLimits( + self.global_limits = APIKeyLimits( token_per_minute=global_limits["tpm"], token_per_day=global_limits["tpd"], request_per_minute=global_limits["rpm"], @@ -68,7 +80,7 @@ def __init__( self.asset_limits = asset_limits for i, asset_limit in enumerate(self.asset_limits): if isinstance(asset_limit, dict): - self.asset_limits[i] = APIKeyGlobalLimits( + self.asset_limits[i] = APIKeyLimits( token_per_minute=asset_limit["tpm"], token_per_day=asset_limit["tpd"], request_per_minute=asset_limit["rpm"], @@ -110,7 +122,7 @@ def to_dict(self) -> Dict: "id": self.id, "name": self.name, "budget": self.budget, - "assetLimits": [], + "assetsLimits": [], "expiresAt": self.expires_at, } @@ -126,7 +138,7 @@ def to_dict(self) -> Dict: } for i, asset_limit in enumerate(self.asset_limits): - payload["assetLimits"].append( + payload["assetsLimits"].append( { "tpm": asset_limit.token_per_minute, "tpd": asset_limit.token_per_day, @@ -157,8 +169,6 @@ def get_usage(self, asset_id: Optional[Text] = None) -> APIKeyUsageLimit: url = f"{config.BACKEND_URL}/sdk/api-keys/{self.id}/usage-limits" headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for GET API Key Usage - {url} - {headers}") - if asset_id is not None: - url += f"?assetId={asset_id}" r = _request_with_retry("GET", url, headers=headers) resp = r.json() except Exception: @@ -167,11 +177,48 @@ def get_usage(self, asset_id: Optional[Text] = None) -> APIKeyUsageLimit: raise Exception(f"{message}") if 200 <= r.status_code < 300: - return APIKeyUsageLimit( - request_count=resp["requestCount"], - request_count_limit=resp["requestCountLimit"], - token_count=resp["tokenCount"], - token_count_limit=resp["tokenCountLimit"], - ) + return [ + APIKeyUsageLimit( + daily_request_count=limit["requestCount"], + daily_request_limit=limit["requestCountLimit"], + daily_token_count=limit["tokenCount"], + daily_token_limit=limit["tokenCountLimit"], + model=limit["assetId"] if "assetId" in limit else None, + ) + for limit in resp + if asset_id is None or ("assetId" in limit and limit["assetId"] == asset_id) + ] else: raise Exception(f"API Key Usage Error: Failed to get usage. Error: {str(resp)}") + + def __set_limit(self, limit: int, model: Optional[Union[Text, Model]], limit_type: Text) -> None: + """Set a limit for an API key""" + if model is None: + setattr(self.global_limits, limit_type, limit) + else: + if isinstance(model, Model): + model = model.id + is_found = False + for i, asset_limit in enumerate(self.asset_limits): + if asset_limit.model.id == model: + setattr(self.asset_limits[i], limit_type, limit) + is_found = True + break + if is_found is False: + raise Exception(f"Limit for Model {model} not found in the API key.") + + def set_token_per_day(self, token_per_day: int, model: Optional[Union[Text, Model]] = None) -> None: + """Set the token per day limit of an API key""" + self.__set_limit(token_per_day, model, "token_per_day") + + def set_token_per_minute(self, token_per_minute: int, model: Optional[Union[Text, Model]] = None) -> None: + """Set the token per minute limit of an API key""" + self.__set_limit(token_per_minute, model, "token_per_minute") + + def set_request_per_day(self, request_per_day: int, model: Optional[Union[Text, Model]] = None) -> None: + """Set the request per day limit of an API key""" + self.__set_limit(request_per_day, model, "request_per_day") + + def set_request_per_minute(self, request_per_minute: int, model: Optional[Union[Text, Model]] = None) -> None: + """Set the request per minute limit of an API key""" + self.__set_limit(request_per_minute, model, "request_per_minute") diff --git a/aixplain/modules/model/utils.py b/aixplain/modules/model/utils.py index a78455b7..d29da68b 100644 --- a/aixplain/modules/model/utils.py +++ b/aixplain/modules/model/utils.py @@ -55,8 +55,9 @@ def call_run_endpoint(url: Text, api_key: Text, payload: Dict) -> Dict: "error_message": "Model Run: An error occurred while processing your request.", } else: - response = {"status": status, "data": data, "completed": True} + response = resp else: + resp = resp["error"] if "error" in resp else resp if r.status_code == 401: error = f"Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: {resp}" elif 460 <= r.status_code < 470: @@ -66,7 +67,7 @@ def call_run_endpoint(url: Text, api_key: Text, payload: Dict) -> Dict: elif 480 <= r.status_code < 490: error = f"Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access. Details: {resp}" elif 490 <= r.status_code < 500: - error = f"Validation-related error: Please ensure all required fields are provided and correctly formatted. Details: {resp}" + error = f"{resp}" else: status_code = str(r.status_code) error = f"Status {status_code} - Unspecified error: {resp}" diff --git a/aixplain/utils/config.py b/aixplain/utils/config.py index 59805c60..03bbdccf 100644 --- a/aixplain/utils/config.py +++ b/aixplain/utils/config.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) BACKEND_URL = os.getenv("BACKEND_URL", "https://platform-api.aixplain.com") -MODELS_RUN_URL = os.getenv("MODELS_RUN_URL", "https://models.aixplain.com") +MODELS_RUN_URL = os.getenv("MODELS_RUN_URL", "https://models.aixplain.com/api/v1/execute") # GET THE API KEY FROM CMD TEAM_API_KEY = os.getenv("TEAM_API_KEY", "") AIXPLAIN_API_KEY = os.getenv("AIXPLAIN_API_KEY", "") diff --git a/pyproject.toml b/pyproject.toml index e0df02a2..1f034299 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.21rc0" +version = "0.2.21rc1" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" diff --git a/tests/functional/apikey/test_api.py b/tests/functional/apikey/test_api.py index 80b75189..2c228f6b 100644 --- a/tests/functional/apikey/test_api.py +++ b/tests/functional/apikey/test_api.py @@ -1,5 +1,5 @@ from aixplain.factories.api_key_factory import APIKeyFactory -from aixplain.modules import APIKey, APIKeyGlobalLimits, APIKeyUsageLimit +from aixplain.modules import APIKey, APIKeyLimits, APIKeyUsageLimit from datetime import datetime import json import pytest @@ -16,7 +16,7 @@ def test_create_api_key_from_json(): api_key = APIKeyFactory.create( name=api_key_data["name"], asset_limits=[ - APIKeyGlobalLimits( + APIKeyLimits( model=api_key_data["asset_limits"][0]["model"], token_per_minute=api_key_data["asset_limits"][0]["token_per_minute"], token_per_day=api_key_data["asset_limits"][0]["token_per_day"], @@ -24,7 +24,7 @@ def test_create_api_key_from_json(): request_per_minute=api_key_data["asset_limits"][0]["request_per_minute"], ) ], - global_limits=APIKeyGlobalLimits( + global_limits=APIKeyLimits( token_per_minute=api_key_data["global_limits"]["token_per_minute"], token_per_day=api_key_data["global_limits"]["token_per_day"], request_per_day=api_key_data["global_limits"]["request_per_day"], @@ -60,8 +60,8 @@ def test_create_api_key_from_dict(): api_key_name = "Test API Key" api_key = APIKeyFactory.create( name=api_key_name, - asset_limits=[APIKeyGlobalLimits(**limit) for limit in api_key_dict["asset_limits"]], - global_limits=APIKeyGlobalLimits(**api_key_dict["global_limits"]), + asset_limits=[APIKeyLimits(**limit) for limit in api_key_dict["asset_limits"]], + global_limits=APIKeyLimits(**api_key_dict["global_limits"]), budget=api_key_dict["budget"], expires_at=datetime.strptime(api_key_dict["expires_at"], "%Y-%m-%dT%H:%M:%SZ"), ) @@ -92,8 +92,8 @@ def test_create_update_api_key_from_dict(): api_key_name = "Test API Key" api_key = APIKeyFactory.create( name=api_key_name, - asset_limits=[APIKeyGlobalLimits(**limit) for limit in api_key_dict["asset_limits"]], - global_limits=APIKeyGlobalLimits(**api_key_dict["global_limits"]), + asset_limits=[APIKeyLimits(**limit) for limit in api_key_dict["asset_limits"]], + global_limits=APIKeyLimits(**api_key_dict["global_limits"]), budget=api_key_dict["budget"], expires_at=datetime.strptime(api_key_dict["expires_at"], "%Y-%m-%dT%H:%M:%SZ"), ) @@ -102,6 +102,11 @@ def test_create_update_api_key_from_dict(): assert api_key.id != "" assert api_key.name == api_key_name + api_key_ = APIKeyFactory.get(api_key=api_key.access_key) + assert isinstance(api_key_, APIKey) + assert api_key_.id != "" + assert api_key_.name == api_key_name + api_key.global_limits.token_per_day = 222 api_key.global_limits.token_per_minute = 222 api_key.global_limits.request_per_day = 222 @@ -134,7 +139,9 @@ def test_list_api_keys(): if api_key.is_admin is False: usage = api_key.get_usage() - assert isinstance(usage, APIKeyUsageLimit) + assert isinstance(usage, list) + if len(usage) > 0: + assert isinstance(usage[0], APIKeyUsageLimit) def test_list_update_api_keys(): @@ -149,7 +156,7 @@ def test_list_update_api_keys(): number = randint(0, 10000) if api_key.global_limits is None: - api_key.global_limits = APIKeyGlobalLimits( + api_key.global_limits = APIKeyLimits( token_per_minute=number, token_per_day=number, request_per_day=number, @@ -166,7 +173,7 @@ def test_list_update_api_keys(): if len(api_key.asset_limits) == 0: api_key.asset_limits.append( - APIKeyGlobalLimits( + APIKeyLimits( model="640b517694bf816d35a59125", token_per_minute=number, token_per_day=number, diff --git a/tests/unit/api_key_test.py b/tests/unit/api_key_test.py index 60d2371d..7da4e082 100644 --- a/tests/unit/api_key_test.py +++ b/tests/unit/api_key_test.py @@ -1,5 +1,5 @@ __author__ = "aixplain" -from aixplain.modules import APIKeyGlobalLimits +from aixplain.modules import APIKeyLimits from datetime import datetime import requests_mock import aixplain.utils.config as config @@ -13,7 +13,7 @@ def read_data(data_path): def test_api_key_service(): with requests_mock.Mocker() as mock: - model_id = "640b517694bf816d35a59125" + model_id = "test_asset_id" model_url = f"{config.BACKEND_URL}/sdk/models/{model_id}" model_map = read_data("tests/unit/mock_responses/model_response.json") mock.get(model_url, json=model_map) @@ -25,7 +25,7 @@ def test_api_key_service(): "accessKey": "access-key", "budget": 1000, "globalLimits": {"tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}, - "assetLimits": [{"assetId": model_id, "tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}], + "assetsLimits": [{"assetId": model_id, "tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}], "expiresAt": "2024-10-07T00:00:00Z", "isAdmin": False, } @@ -34,13 +34,11 @@ def test_api_key_service(): api_key = APIKeyFactory.create( name="Test API Key", asset_limits=[ - APIKeyGlobalLimits( + APIKeyLimits( model=model_id, token_per_minute=100, token_per_day=1000, request_per_day=1000, request_per_minute=100 ) ], - global_limits=APIKeyGlobalLimits( - token_per_minute=100, token_per_day=1000, request_per_day=1000, request_per_minute=100 - ), + global_limits=APIKeyLimits(token_per_minute=100, token_per_day=1000, request_per_day=1000, request_per_minute=100), budget=1000, expires_at=datetime(2024, 10, 7), ) @@ -65,3 +63,58 @@ def test_api_key_service(): mock.delete(delete_url, status_code=200) api_key.delete() + + +def test_setters(): + with requests_mock.Mocker() as mock: + model_id = "test_asset_id" + model_url = f"{config.BACKEND_URL}/sdk/models/{model_id}" + model_map = read_data("tests/unit/mock_responses/model_response.json") + mock.get(model_url, json=model_map) + + create_url = f"{config.BACKEND_URL}/sdk/api-keys" + api_key_response = { + "id": "key-id", + "name": "Name", + "accessKey": "access-key", + "budget": 1000, + "globalLimits": {"tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}, + "assetsLimits": [{"assetId": model_id, "tpm": 100, "tpd": 1000, "rpd": 1000, "rpm": 100}], + "expiresAt": "2024-10-07T00:00:00Z", + "isAdmin": False, + } + mock.post(create_url, json=api_key_response) + + api_key = APIKeyFactory.create( + name="Test API Key", + asset_limits=[ + APIKeyLimits( + model=model_id, token_per_minute=100, token_per_day=1000, request_per_day=1000, request_per_minute=100 + ) + ], + global_limits=APIKeyLimits(token_per_minute=100, token_per_day=1000, request_per_day=1000, request_per_minute=100), + budget=1000, + expires_at=datetime(2024, 10, 7), + ) + + api_key.set_token_per_day(1) + api_key.set_token_per_minute(1) + api_key.set_request_per_day(1) + api_key.set_request_per_minute(1) + api_key.set_token_per_day(1, model_id) + api_key.set_token_per_minute(1, model_id) + api_key.set_request_per_day(1, model_id) + api_key.set_request_per_minute(1, model_id) + + assert api_key.asset_limits[0].token_per_day == 1 + assert api_key.asset_limits[0].token_per_minute == 1 + assert api_key.asset_limits[0].request_per_day == 1 + assert api_key.asset_limits[0].request_per_minute == 1 + assert api_key.global_limits.token_per_day == 1 + assert api_key.global_limits.token_per_minute == 1 + assert api_key.global_limits.request_per_day == 1 + assert api_key.global_limits.request_per_minute == 1 + + +if __name__ == "__main__": + test_setters() diff --git a/tests/unit/llm_test.py b/tests/unit/llm_test.py index b0dbe19a..54887950 100644 --- a/tests/unit/llm_test.py +++ b/tests/unit/llm_test.py @@ -14,25 +14,25 @@ [ ( 401, - "Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: An unspecified error occurred while processing your request.", ), ( 465, - "Subscription-related error: Please ensure that your subscription is active and has not expired. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "Subscription-related error: Please ensure that your subscription is active and has not expired. Details: An unspecified error occurred while processing your request.", ), ( 475, - "Billing-related error: Please ensure you have enough credits to run this model. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "Billing-related error: Please ensure you have enough credits to run this model. Details: An unspecified error occurred while processing your request.", ), ( 485, - "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access. Details: An unspecified error occurred while processing your request.", ), ( 495, - "Validation-related error: Please ensure all required fields are provided and correctly formatted. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "An unspecified error occurred while processing your request.", ), - (501, "Status 501 - Unspecified error: {'error': 'An unspecified error occurred while processing your request.'}"), + (501, "Status 501 - Unspecified error: An unspecified error occurred while processing your request."), ], ) def test_run_async_errors(status_code, error_message): diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index 0907b8f1..03dccdbe 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -114,25 +114,25 @@ def test_failed_poll(): [ ( 401, - "Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: An unspecified error occurred while processing your request.", ), ( 465, - "Subscription-related error: Please ensure that your subscription is active and has not expired. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "Subscription-related error: Please ensure that your subscription is active and has not expired. Details: An unspecified error occurred while processing your request.", ), ( 475, - "Billing-related error: Please ensure you have enough credits to run this model. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "Billing-related error: Please ensure you have enough credits to run this model. Details: An unspecified error occurred while processing your request.", ), ( 485, - "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "Supplier-related error: Please ensure that the selected supplier provides the model you are trying to access. Details: An unspecified error occurred while processing your request.", ), ( 495, - "Validation-related error: Please ensure all required fields are provided and correctly formatted. Details: {'error': 'An unspecified error occurred while processing your request.'}", + "An unspecified error occurred while processing your request.", ), - (501, "Status 501 - Unspecified error: {'error': 'An unspecified error occurred while processing your request.'}"), + (501, "Status 501 - Unspecified error: An unspecified error occurred while processing your request."), ], ) def test_run_async_errors(status_code, error_message): From 349ea6066da31eae7358976c01466679b234b2d7 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Fri, 25 Oct 2024 22:27:57 +0300 Subject: [PATCH 053/105] Bug 149 - Path validation removed and decision node output param handling revisited (#260) --- aixplain/factories/pipeline_factory/utils.py | 17 ++++++++++++++++- aixplain/modules/pipeline/designer/base.py | 17 ++++++++++++++--- aixplain/modules/pipeline/designer/nodes.py | 10 +++++++--- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py index 9584863f..7911c370 100644 --- a/aixplain/factories/pipeline_factory/utils.py +++ b/aixplain/factories/pipeline_factory/utils.py @@ -86,8 +86,23 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipe node.label = node_json["label"] pipeline.add_node(node) + # Decision nodes' output parameters are defined based on their + # input parameters linked. So here we have to make sure that + # decision nodes (having passthrough parameter) should be first + # linked + link_jsons = response["links"][:] + decision_links = [] + for link_json in link_jsons: + for pm in link_json["paramMapping"]: + if pm["to"] == "passthrough": + decision_link_index = link_jsons.index(link_json) + decision_link = link_jsons.pop(decision_link_index) + decision_links.append(decision_link) + + link_jsons = decision_links + link_jsons + # instantiating links - for link_json in response["links"]: + for link_json in link_jsons: for param_mapping in link_json["paramMapping"]: link = Link( from_node=pipeline.get_node(link_json["from"]), diff --git a/aixplain/modules/pipeline/designer/base.py b/aixplain/modules/pipeline/designer/base.py index 76e6196d..a7873ec4 100644 --- a/aixplain/modules/pipeline/designer/base.py +++ b/aixplain/modules/pipeline/designer/base.py @@ -142,14 +142,25 @@ def __init__( pipeline: "DesignerPipeline" = None, ): - assert from_param in from_node.outputs, "Invalid from param" - assert to_param in to_node.inputs, "Invalid to param" - if isinstance(from_param, Param): from_param = from_param.code if isinstance(to_param, Param): to_param = to_param.code + assert from_param in from_node.outputs, \ + "Invalid from param. "\ + "Make sure all input params are already linked accordingly" + + fp_instance = from_node.outputs[from_param] + from .nodes import Decision + if (isinstance(to_node, Decision) and + to_param == to_node.inputs.passthrough.code): + to_node.outputs.create_param(from_param, + fp_instance.data_type, + is_required=fp_instance.is_required) + + assert to_param in to_node.inputs, "Invalid to param" + self.from_node = from_node self.to_node = to_node self.from_param = from_param diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index a6879e04..70ff302f 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -288,11 +288,15 @@ def __init__(self, value: DataType, path: List[Union[Node, int]], operation: Ope self.operation = operation self.type = type - if not self.path: - raise ValueError("Path is not valid, should be a list of nodes") + # Path can be an empty list in case the user has a valid case + # if not self.path: + # raise ValueError("Path is not valid, should be a list of nodes") # convert nodes to node numbers if they are nodes - self.path = [node.number if isinstance(node, Node) else node for node in self.path] + self.path = [ + node.number if isinstance(node, Node) else node + for node in self.path + ] def serialize(self) -> dict: return { From dd46dcf2208fb3ed955efc2181c4b43df1d660e1 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Tue, 29 Oct 2024 14:59:00 +0300 Subject: [PATCH 054/105] Changed function to required field (#283) * Changed function to required feild * added function assert to list * fixed list function errors * Fixing the listing tests --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/model_factory.py | 4 ++-- .../general_assets/asset_functional_test.py | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index 5df7c924..fc64f1a7 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -221,8 +221,8 @@ def _get_assets_from_page( @classmethod def list( cls, + function: Function, query: Optional[Text] = "", - function: Optional[Function] = None, suppliers: Optional[Union[Supplier, List[Supplier]]] = None, source_languages: Optional[Union[Language, List[Language]]] = None, target_languages: Optional[Union[Language, List[Language]]] = None, @@ -236,7 +236,7 @@ def list( """Gets the first k given models based on the provided task and language filters Args: - function (Optional[Function], optional): function filter. Defaults to None. + function (Function): function filter. source_languages (Optional[Union[Language, List[Language]]], optional): language filter of input data. Defaults to None. target_languages (Optional[Union[Language, List[Language]]], optional): language filter of output data. Defaults to None. is_finetunable (Optional[bool], optional): can be finetuned or not. Defaults to None. diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index b0d8f6ef..266b04ea 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -33,7 +33,10 @@ def __get_asset_factory(asset_name): @pytest.mark.parametrize("asset_name", ["model", "dataset", "metric"]) def test_list(asset_name): AssetFactory = __get_asset_factory(asset_name) - asset_list = AssetFactory.list() + if asset_name == "model": + asset_list = AssetFactory.list(function=Function.TRANSLATION) + else: + asset_list = AssetFactory.list() assert asset_list["page_total"] == len(asset_list["results"]) @@ -62,7 +65,7 @@ def test_model_function(): def test_model_supplier(): desired_suppliers = [Supplier.GOOGLE] - models = ModelFactory.list(suppliers=desired_suppliers)["results"] + models = ModelFactory.list(suppliers=desired_suppliers, function=Function.TRANSLATION)["results"] for model in models: assert model.supplier.value in [desired_supplier.value for desired_supplier in desired_suppliers] @@ -89,14 +92,14 @@ def test_model_sort(): def test_model_ownership(): - models = ModelFactory.list(ownership=OwnershipType.SUBSCRIBED)["results"] + models = ModelFactory.list(ownership=OwnershipType.SUBSCRIBED, function=Function.TRANSLATION)["results"] for model in models: assert model.is_subscribed is True def test_model_query(): query = "Mongo" - models = ModelFactory.list(query=query)["results"] + models = ModelFactory.list(query=query, function=Function.TRANSLATION)["results"] for model in models: assert query in model.name From b86d5e74060a99ebb5633b51ee3eea7b9e45fa7d Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Tue, 29 Oct 2024 14:59:19 +0300 Subject: [PATCH 055/105] BUG-206: Fixed passthrough parameter reflection to next node (#284) --- aixplain/modules/pipeline/designer/base.py | 31 +++++++++++++--------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/aixplain/modules/pipeline/designer/base.py b/aixplain/modules/pipeline/designer/base.py index a7873ec4..49c68463 100644 --- a/aixplain/modules/pipeline/designer/base.py +++ b/aixplain/modules/pipeline/designer/base.py @@ -147,17 +147,26 @@ def __init__( if isinstance(to_param, Param): to_param = to_param.code - assert from_param in from_node.outputs, \ - "Invalid from param. "\ + assert from_param in from_node.outputs, ( + "Invalid from param. " "Make sure all input params are already linked accordingly" + ) fp_instance = from_node.outputs[from_param] from .nodes import Decision - if (isinstance(to_node, Decision) and - to_param == to_node.inputs.passthrough.code): - to_node.outputs.create_param(from_param, - fp_instance.data_type, - is_required=fp_instance.is_required) + + if ( + isinstance(to_node, Decision) + and to_param == to_node.inputs.passthrough.code + ): + if from_param not in to_node.outputs: + to_node.outputs.create_param( + from_param, + fp_instance.data_type, + is_required=fp_instance.is_required, + ) + else: + to_node.outputs[from_param].data_type = fp_instance.data_type assert to_param in to_node.inputs, "Invalid to param" @@ -244,9 +253,7 @@ def __init__(self, node: "Node", *args, **kwargs): def add_param(self, param: Param) -> None: # check if param already registered if param in self: - raise ValueError( - f"Parameter with code '{param.code}' already exists." - ) + raise ValueError(f"Parameter with code '{param.code}' already exists.") self._params.append(param) # also set attribute on the node dynamically if there's no # any attribute with the same name @@ -364,9 +371,7 @@ def attach_to(self, pipeline: "DesignerPipeline"): :param pipeline: the pipeline """ assert not self.pipeline, "Node already attached to a pipeline" - assert ( - self not in pipeline.nodes - ), "Node already attached to a pipeline" + assert self not in pipeline.nodes, "Node already attached to a pipeline" assert self.type, "Node type not set" self.pipeline = pipeline From 0dbfab1f20b83027f72f66b6977b2ee8fae3e05c Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 29 Oct 2024 09:41:14 -0300 Subject: [PATCH 056/105] Get model description (#286) --- aixplain/factories/model_factory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index fc64f1a7..209ff75d 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -80,6 +80,7 @@ def _create_model_from_response(cls, response: Dict) -> Model: return ModelClass( response["id"], response["name"], + description=response.get("description", ""), supplier=response["supplier"], api_key=response["api_key"], cost=response["pricing"], From 7ce3c734399e80dd205122177712d3167fc8e65a Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 29 Oct 2024 12:33:06 -0300 Subject: [PATCH 057/105] Fixing default parameters setting (#288) --- aixplain/modules/model/__init__.py | 4 ++-- aixplain/modules/model/llm_model.py | 4 ++-- aixplain/modules/model/utils.py | 2 +- tests/functional/model/run_model_test.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 441027d4..765960d4 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -188,7 +188,7 @@ def run( data: Union[Text, Dict], name: Text = "model_process", timeout: float = 300, - parameters: Dict = {}, + parameters: Optional[Dict] = {}, wait_time: float = 0.5, ) -> Dict: """Runs a model call. @@ -220,7 +220,7 @@ def run( response = {"status": "FAILED", "error": msg, "elapsed_time": end - start} return response - def run_async(self, data: Union[Text, Dict], name: Text = "model_process", parameters: Dict = {}) -> Dict: + def run_async(self, data: Union[Text, Dict], name: Text = "model_process", parameters: Optional[Dict] = {}) -> Dict: """Runs asynchronously a model call. Args: diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 876b24dc..f48a3068 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -102,7 +102,7 @@ def run( top_p: float = 1.0, name: Text = "model_process", timeout: float = 300, - parameters: Dict = {}, + parameters: Optional[Dict] = {}, wait_time: float = 0.5, ) -> Dict: """Synchronously running a Large Language Model (LLM) model. @@ -160,7 +160,7 @@ def run_async( max_tokens: int = 128, top_p: float = 1.0, name: Text = "model_process", - parameters: Dict = {}, + parameters: Optional[Dict] = {}, ) -> Dict: """Runs asynchronously a model call. diff --git a/aixplain/modules/model/utils.py b/aixplain/modules/model/utils.py index d29da68b..2235b35a 100644 --- a/aixplain/modules/model/utils.py +++ b/aixplain/modules/model/utils.py @@ -57,7 +57,7 @@ def call_run_endpoint(url: Text, api_key: Text, payload: Dict) -> Dict: else: response = resp else: - resp = resp["error"] if "error" in resp else resp + resp = resp["error"] if isinstance(resp, dict) and "error" in resp else resp if r.status_code == 401: error = f"Unauthorized API key: Please verify the spelling of the API key and its current validity. Details: {resp}" elif 460 <= r.status_code < 470: diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py index d5c1d6ac..0e97a2ab 100644 --- a/tests/functional/model/run_model_test.py +++ b/tests/functional/model/run_model_test.py @@ -12,7 +12,7 @@ def pytest_generate_tests(metafunc): four_weeks_ago = datetime.now(timezone.utc) - timedelta(weeks=4) models = ModelFactory.list(function=Function.TEXT_GENERATION)["results"] - predefined_models = ["Groq Llama 3 70B", "Chat GPT 3.5", "GPT-4o", "GPT 4 (32k)"] + predefined_models = ["Groq Llama 3 70B", "Chat GPT 3.5", "GPT-4o"] recent_models = [model for model in models if model.created_at and model.created_at >= four_weeks_ago] combined_models = recent_models + [ ModelFactory.list(query=model, function=Function.TEXT_GENERATION)["results"][0] for model in predefined_models From 09908eda2067c0648fd15158468ad1351bd3ea22 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Thu, 31 Oct 2024 00:25:27 +0300 Subject: [PATCH 058/105] Eng 893 ai xplain sdk improve error log message when deleting an agent (#290) * Error message update * Error message update * added try catch --- aixplain/modules/agent/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index a7586c8b..41bb0a2e 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -252,6 +252,10 @@ def delete(self) -> None: if r.status_code != 200: raise Exception() except Exception: - message = f"Agent Deletion Error (HTTP {r.status_code}): Make sure the agent exists and you are the owner." + try: + response_json = r.json() + message = f"Agent Deletion Error (HTTP {r.status_code}): {response_json.get('message')}." + except ValueError: + message = f"Agent Deletion Error (HTTP {r.status_code}): There was an error in deleting the agent." logging.error(message) - raise Exception(f"{message}") + raise Exception(message) \ No newline at end of file From c3c02284326c434d9dfb463a23a5c46471d65a6d Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:49:43 -0300 Subject: [PATCH 059/105] Fixes of errors pointed by functional test (#291) --- pyproject.toml | 2 +- tests/functional/apikey/test_api.py | 56 ------------------------ tests/functional/model/run_model_test.py | 15 ++++--- 3 files changed, 11 insertions(+), 62 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f034299..1656947a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.21rc1" +version = "0.2.21" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" diff --git a/tests/functional/apikey/test_api.py b/tests/functional/apikey/test_api.py index 221a58fb..2c228f6b 100644 --- a/tests/functional/apikey/test_api.py +++ b/tests/functional/apikey/test_api.py @@ -199,62 +199,6 @@ def test_list_update_api_keys(): break - -def test_list_update_api_keys(): - api_keys = APIKeyFactory.list() - assert isinstance(api_keys, list) - - for api_key in api_keys: - assert isinstance(api_key, APIKey) - assert api_key.id != "" - - from random import randint - - number = randint(0, 10000) - if api_key.global_limits is None: - api_key.global_limits = APIKeyGlobalLimits( - token_per_minute=number, - token_per_day=number, - request_per_day=number, - request_per_minute=number, - ) - else: - api_key.global_limits.token_per_day = number - api_key.global_limits.token_per_minute = number - api_key.global_limits.request_per_day = number - api_key.global_limits.request_per_minute = number - - if api_key.asset_limits is None: - api_key.asset_limits = [] - - if len(api_key.asset_limits) == 0: - api_key.asset_limits.append( - APIKeyGlobalLimits( - model="640b517694bf816d35a59125", - token_per_minute=number, - token_per_day=number, - request_per_day=number, - request_per_minute=number, - ) - ) - else: - api_key.asset_limits[0].request_per_day = number - api_key.asset_limits[0].request_per_minute = number - api_key.asset_limits[0].token_per_day = number - api_key.asset_limits[0].token_per_minute = number - api_key = APIKeyFactory.update(api_key) - - assert api_key.global_limits.token_per_day == number - assert api_key.global_limits.token_per_minute == number - assert api_key.global_limits.request_per_day == number - assert api_key.global_limits.request_per_minute == number - assert api_key.asset_limits[0].request_per_day == number - assert api_key.asset_limits[0].request_per_minute == number - assert api_key.asset_limits[0].token_per_day == number - assert api_key.asset_limits[0].token_per_minute == number - break - - def test_create_api_key_wrong_input(): api_key_name = "Test API Key" diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py index 0e97a2ab..04335d19 100644 --- a/tests/functional/model/run_model_test.py +++ b/tests/functional/model/run_model_test.py @@ -12,11 +12,17 @@ def pytest_generate_tests(metafunc): four_weeks_ago = datetime.now(timezone.utc) - timedelta(weeks=4) models = ModelFactory.list(function=Function.TEXT_GENERATION)["results"] - predefined_models = ["Groq Llama 3 70B", "Chat GPT 3.5", "GPT-4o"] + predefined_models = [] + for predefined_model in ["Groq Llama 3 70B", "Chat GPT 3.5", "GPT-4o"]: + predefined_models.extend( + [ + m + for m in ModelFactory.list(query=predefined_model, function=Function.TEXT_GENERATION)["results"] + if m.name == predefined_model and "aiXplain-testing" not in str(m.supplier) + ] + ) recent_models = [model for model in models if model.created_at and model.created_at >= four_weeks_ago] - combined_models = recent_models + [ - ModelFactory.list(query=model, function=Function.TEXT_GENERATION)["results"][0] for model in predefined_models - ] + combined_models = recent_models + predefined_models metafunc.parametrize("llm_model", combined_models) @@ -24,7 +30,6 @@ def test_llm_run(llm_model): """Testing LLMs with history context""" assert isinstance(llm_model, LLM) - response = llm_model.run( data="What is my name?", history=[{"role": "user", "content": "Hello! My name is Thiago."}, {"role": "assistant", "content": "Hello!"}], From ef5d61f0977d571833f1e6f8af6b1640424ec805 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:15:16 -0300 Subject: [PATCH 060/105] Fix agent and team agent functional tests (#294) * Fix agent and team agent functional tests * Address PR comment --------- Co-authored-by: Lucas Pavanelli --- aixplain/modules/agent/__init__.py | 5 +-- .../functional/agent/agent_functional_test.py | 34 +++++++++++++++++-- .../team_agent/team_agent_functional_test.py | 19 +++++++++-- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index 41bb0a2e..3f892723 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -249,13 +249,14 @@ def delete(self) -> None: headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} logging.debug(f"Start service for DELETE Agent - {url} - {headers}") r = _request_with_retry("delete", url, headers=headers) + logging.debug(f"Result of request for DELETE Agent - {r.status_code}") if r.status_code != 200: raise Exception() except Exception: try: response_json = r.json() - message = f"Agent Deletion Error (HTTP {r.status_code}): {response_json.get('message')}." + message = f"Agent Deletion Error (HTTP {r.status_code}): {response_json.get('message', '').strip('{{}}')}." except ValueError: message = f"Agent Deletion Error (HTTP {r.status_code}): There was an error in deleting the agent." logging.error(message) - raise Exception(message) \ No newline at end of file + raise Exception(message) diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 648f4f28..478b23f3 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -19,7 +19,7 @@ from dotenv import load_dotenv load_dotenv() -from aixplain.factories import AgentFactory +from aixplain.factories import AgentFactory, TeamAgentFactory from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier @@ -36,11 +36,23 @@ def read_data(data_path): def run_input_map(request): return request.param +@pytest.fixture(scope="function") +def delete_agents_and_team_agents(): + for team_agent in TeamAgentFactory.list()["results"]: + team_agent.delete() + for agent in AgentFactory.list()["results"]: + agent.delete() + + yield True -def test_end2end(run_input_map): + for team_agent in TeamAgentFactory.list()["results"]: + team_agent.delete() for agent in AgentFactory.list()["results"]: agent.delete() + +def test_end2end(run_input_map, delete_agents_and_team_agents): + assert delete_agents_and_team_agents tools = [] if "model_tools" in run_input_map: for tool in run_input_map["model_tools"]: @@ -88,3 +100,21 @@ def test_fail_non_existent_llm(): tools=[AgentFactory.create_model_tool(function=Function.TRANSLATION)], ) assert str(exc_info.value) == "Large Language Model with ID 'non_existent_llm' not found." + +def test_delete_agent_in_use(delete_agents_and_team_agents): + assert delete_agents_and_team_agents + agent = AgentFactory.create( + name="Test Agent", + description="Test description", + tools=[AgentFactory.create_model_tool(function=Function.TRANSLATION)], + ) + TeamAgentFactory.create( + name="Test Team Agent", + agents=[agent], + description="Test description", + use_mentalist_and_inspector=True, + ) + + with pytest.raises(Exception) as exc_info: + agent.delete() + assert str(exc_info.value) == "Agent Deletion Error (HTTP 403): err.agent_is_in_use." \ No newline at end of file diff --git a/tests/functional/team_agent/team_agent_functional_test.py b/tests/functional/team_agent/team_agent_functional_test.py index c7e50b68..c28b01da 100644 --- a/tests/functional/team_agent/team_agent_functional_test.py +++ b/tests/functional/team_agent/team_agent_functional_test.py @@ -31,15 +31,28 @@ def read_data(data_path): return json.load(open(data_path, "r")) +@pytest.fixture(scope="function") +def delete_agents_and_team_agents(): + for team_agent in TeamAgentFactory.list()["results"]: + team_agent.delete() + for agent in AgentFactory.list()["results"]: + agent.delete() + + yield True + + for team_agent in TeamAgentFactory.list()["results"]: + team_agent.delete() + for agent in AgentFactory.list()["results"]: + agent.delete() + @pytest.fixture(scope="module", params=read_data(RUN_FILE)) def run_input_map(request): return request.param -def test_end2end(run_input_map): - for agent in AgentFactory.list()["results"]: - agent.delete() +def test_end2end(run_input_map, delete_agents_and_team_agents): + assert delete_agents_and_team_agents agents = [] for agent in run_input_map["agents"]: From 1e43ed3121a49c855d10cdf9b85735d75e1ce5aa Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Thu, 31 Oct 2024 23:23:57 +0300 Subject: [PATCH 061/105] designer pipeline building are now compatible with custom inputs (#296) --- aixplain/factories/pipeline_factory/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py index 7911c370..c9291031 100644 --- a/aixplain/factories/pipeline_factory/utils.py +++ b/aixplain/factories/pipeline_factory/utils.py @@ -82,6 +82,14 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipe for output_param in node_json["outputValues"] if output_param["code"] not in node.outputs ] + if "customInputs" in node_json: + for custom_input in node_json["customInputs"]: + node.inputs.create_param( + data_type=custom_input.get("dataType"), + code=custom_input["code"], + value=custom_input.get("value"), + is_required=custom_input.get("isRequired", False), + ) node.number = node_json["number"] node.label = node_json["label"] pipeline.add_node(node) From d77348ae670e19fbd3af5063f3f0ec56016e89e6 Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Mon, 4 Nov 2024 16:10:25 +0300 Subject: [PATCH 062/105] Model Response Class (#279) * Made changes * Return the entire model serving object when an synchronous call succeed. * Updated tests for model response * fixed run async * Fixes in objectify responses * Added changes to llm * Syntax fix + unit test response assertion * Added run sync and poll tests * Improvements in unit tests * Add details field * Treat legacy for elapsed time and used credits model variables * __repr__ and get in ModelResponse --------- Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> --- aixplain/cli_groups.py | 27 +- aixplain/enums/__init__.py | 1 + aixplain/enums/asset_status.py | 33 +- aixplain/enums/data_subtype.py | 2 +- aixplain/enums/function.py | 2 +- aixplain/enums/model_status.py | 11 + aixplain/enums/storage_type.py | 2 +- aixplain/factories/cli/model_factory_cli.py | 79 +- aixplain/factories/file_factory.py | 2 +- .../factories/finetune_factory/__init__.py | 4 +- aixplain/factories/wallet_factory.py | 4 +- aixplain/modules/benchmark.py | 5 +- aixplain/modules/benchmark_job.py | 47 +- aixplain/modules/finetune/status.py | 1 + aixplain/modules/model/__init__.py | 63 +- aixplain/modules/model/llm_model.py | 31 +- aixplain/modules/model/response.py | 70 + aixplain/modules/pipeline/default.py | 1 - .../modules/pipeline/designer/__init__.py | 4 +- aixplain/modules/pipeline/designer/base.py | 12 +- aixplain/modules/pipeline/designer/mixins.py | 4 +- .../modules/pipeline/designer/pipeline.py | 36 +- aixplain/modules/pipeline/generate.py | 15 +- aixplain/modules/pipeline/pipeline.py | 1884 +++++++++-------- aixplain/modules/wallet.py | 2 +- aixplain/utils/__init__.py | 2 +- aixplain/utils/config.py | 2 +- aixplain/utils/convert_datatype_utils.py | 6 +- .../aixplain_diarization_streaming_client.py | 51 +- ...n_speech_transcription_streaming_client.py | 77 +- docs/streaming/make_audio_compatible.py | 12 +- tests/__init__.py | 2 +- .../benchmark/benchmark_functional_test.py | 12 +- .../functional/file_asset/file_create_test.py | 16 +- .../functional/model/image_upload_e2e_test.py | 4 +- .../model/image_upload_functional_test.py | 5 +- tests/test_utils.py | 4 +- tests/unit/finetune_test.py | 14 +- tests/unit/image_upload_test.py | 2 + tests/unit/llm_test.py | 42 + tests/unit/model_test.py | 109 +- 41 files changed, 1532 insertions(+), 1170 deletions(-) create mode 100644 aixplain/enums/model_status.py create mode 100644 aixplain/modules/model/response.py diff --git a/aixplain/cli_groups.py b/aixplain/cli_groups.py index ea5e28be..33192175 100644 --- a/aixplain/cli_groups.py +++ b/aixplain/cli_groups.py @@ -21,28 +21,43 @@ CLI Runner """ import click -from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model, deploy_huggingface_model, get_huggingface_model_status, list_gpus +from aixplain.factories.cli.model_factory_cli import ( + list_host_machines, + list_functions, + create_asset_repo, + asset_repo_login, + onboard_model, + deploy_huggingface_model, + get_huggingface_model_status, + list_gpus, +) -@click.group('cli') + +@click.group("cli") def cli(): pass -@click.group('list') + +@click.group("list") def list(): pass -@click.group('get') + +@click.group("get") def get(): pass -@click.group('create') + +@click.group("create") def create(): pass -@click.group('onboard') + +@click.group("onboard") def onboard(): pass + cli.add_command(list) cli.add_command(get) cli.add_command(create) diff --git a/aixplain/enums/__init__.py b/aixplain/enums/__init__.py index 4da09643..d66facce 100644 --- a/aixplain/enums/__init__.py +++ b/aixplain/enums/__init__.py @@ -13,3 +13,4 @@ from .supplier import Supplier from .sort_by import SortBy from .sort_order import SortOrder +from .model_status import ModelStatus diff --git a/aixplain/enums/asset_status.py b/aixplain/enums/asset_status.py index 134af26e..9274001f 100644 --- a/aixplain/enums/asset_status.py +++ b/aixplain/enums/asset_status.py @@ -24,20 +24,21 @@ from enum import Enum from typing import Text + class AssetStatus(Text, Enum): - HIDDEN = 'hidden' - SCHEDULED = 'scheduled' - ONBOARDING = 'onboarding' - ONBOARDED = 'onboarded' - PENDING = 'pending' - FAILED = 'failed' - TRAINING = 'training' - REJECTED = 'rejected' - ENABLING = 'enabling' - DELETING = 'deleting' - DISABLED = 'disabled' - DELETED = 'deleted' - IN_PROGRESS = 'in_progress' - COMPLETED = 'completed' - CANCELING = 'canceling' - CANCELED = 'canceled' \ No newline at end of file + HIDDEN = "hidden" + SCHEDULED = "scheduled" + ONBOARDING = "onboarding" + ONBOARDED = "onboarded" + PENDING = "pending" + FAILED = "failed" + TRAINING = "training" + REJECTED = "rejected" + ENABLING = "enabling" + DELETING = "deleting" + DISABLED = "disabled" + DELETED = "deleted" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + CANCELING = "canceling" + CANCELED = "canceled" diff --git a/aixplain/enums/data_subtype.py b/aixplain/enums/data_subtype.py index 106b04cb..ef11cab5 100644 --- a/aixplain/enums/data_subtype.py +++ b/aixplain/enums/data_subtype.py @@ -32,6 +32,6 @@ class DataSubtype(Enum): RACE = "race" SPLIT = "split" TOPIC = "topic" - + def __str__(self): return self._value_ diff --git a/aixplain/enums/function.py b/aixplain/enums/function.py index 9a6f47d4..a6d2e40a 100644 --- a/aixplain/enums/function.py +++ b/aixplain/enums/function.py @@ -55,7 +55,7 @@ def load_functions(): if input_data_object["required"] is True }, "output": {output_data_object["dataType"] for output_data_object in function["output"]}, - "spec": function + "spec": function, } for function in resp["items"] } diff --git a/aixplain/enums/model_status.py b/aixplain/enums/model_status.py new file mode 100644 index 00000000..af4ae0a9 --- /dev/null +++ b/aixplain/enums/model_status.py @@ -0,0 +1,11 @@ +from enum import Enum +from typing import Text + + +class ModelStatus(Text, Enum): + FAILED = "FAILED" + IN_PROGRESS = "IN_PROGRESS" + SUCCESS = "SUCCESS" + + def __str__(self): + return self._value_ diff --git a/aixplain/enums/storage_type.py b/aixplain/enums/storage_type.py index dca35f29..672d0c67 100644 --- a/aixplain/enums/storage_type.py +++ b/aixplain/enums/storage_type.py @@ -30,4 +30,4 @@ class StorageType(Enum): FILE = "file" def __str__(self): - return self._value_ \ No newline at end of file + return self._value_ diff --git a/aixplain/factories/cli/model_factory_cli.py b/aixplain/factories/cli/model_factory_cli.py index b83d61cc..9c69ca4f 100644 --- a/aixplain/factories/cli/model_factory_cli.py +++ b/aixplain/factories/cli/model_factory_cli.py @@ -26,11 +26,11 @@ import click import yaml + @click.command("hosts") -@click.option("--api-key", default=None, - help="TEAM_API_KEY if not already set in environment") +@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment") def list_host_machines(api_key: Optional[Text] = None) -> None: - """CLI wrapper function for the LIST_HOST_MACHINES function in + """CLI wrapper function for the LIST_HOST_MACHINES function in ModelFactory. Args: @@ -43,16 +43,15 @@ def list_host_machines(api_key: Optional[Text] = None) -> None: ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) + @click.command("functions") -@click.option("--verbose", is_flag=True, - help="List all function details, False by default.") -@click.option("--api-key", default=None, - help="TEAM_API_KEY if not already set in environment.") +@click.option("--verbose", is_flag=True, help="List all function details, False by default.") +@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def list_functions(verbose: bool, api_key: Optional[Text] = None) -> None: """CLI wrapper function for the LIST_FUNCTIONS function in ModelFactory. Args: - verbose (Boolean, optional): Set to True if a detailed response + verbose (Boolean, optional): Set to True if a detailed response is desired; is otherwise False by default. api_key (Text, optional): Team API key. Defaults to None. Returns: @@ -62,9 +61,9 @@ def list_functions(verbose: bool, api_key: Optional[Text] = None) -> None: ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) + @click.command("gpus") -@click.option("--api-key", default=None, - help="TEAM_API_KEY if not already set in environment.") +@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def list_gpus(api_key: Optional[Text] = None) -> None: """CLI wrapper function for the LIST_GPUS function in ModelFactory. @@ -77,22 +76,28 @@ def list_gpus(api_key: Optional[Text] = None) -> None: ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) + @click.command("image-repo") @click.option("--name", help="Model name.") @click.option("--description", help="Description of model.") @click.option("--function", help="Function name obtained from LIST_FUNCTIONS.") -@click.option("--source-language", default="en", - help="Model source language in 2-character 639-1 code or 3-character 639-3 code.") +@click.option( + "--source-language", default="en", help="Model source language in 2-character 639-1 code or 3-character 639-3 code." +) @click.option("--input-modality", help="Input type (text, video, image, etc.)") @click.option("--output-modality", help="Output type (text, video, image, etc.)") @click.option("--documentation-url", default="", help="Link to model documentation.") -@click.option("--api-key", default=None, - help="TEAM_API_KEY if not already set in environment.") -def create_asset_repo(name: Text, description: Text, function: Text, - source_language: Text, input_modality: Text, - output_modality: Text, - documentation_url: Optional[Text] = "", - api_key: Optional[Text] = None) -> None: +@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") +def create_asset_repo( + name: Text, + description: Text, + function: Text, + source_language: Text, + input_modality: Text, + output_modality: Text, + documentation_url: Optional[Text] = "", + api_key: Optional[Text] = None, +) -> None: """CLI wrapper function for the CREATE_ASSET_REPO function in ModelFactory. Args: @@ -109,16 +114,15 @@ def create_asset_repo(name: Text, description: Text, function: Text, Returns: None """ - ret_val = ModelFactory.create_asset_repo(name, description, function, - source_language, input_modality, - output_modality, documentation_url, - api_key) + ret_val = ModelFactory.create_asset_repo( + name, description, function, source_language, input_modality, output_modality, documentation_url, api_key + ) ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) + @click.command("image-repo-login") -@click.option("--api-key", default=None, - help="TEAM_API_KEY if not already set in environment.") +@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def asset_repo_login(api_key: Optional[Text] = None) -> None: """CLI wrapper function for the ASSET_REPO_LOGIN function in ModelFactory. @@ -132,15 +136,16 @@ def asset_repo_login(api_key: Optional[Text] = None) -> None: ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) + @click.command("model") @click.option("--model-id", help="Model ID from CREATE_IMAGE_REPO.") @click.option("--image-tag", help="The tag of the image that you would like hosted.") @click.option("--image-hash", help="The hash of the image you would like onboarded.") @click.option("--host-machine", default="", help="The machine on which to host the model.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") -def onboard_model(model_id: Text, image_tag: Text, image_hash: Text, - host_machine: Optional[Text] = "", - api_key: Optional[Text] = None) -> None: +def onboard_model( + model_id: Text, image_tag: Text, image_hash: Text, host_machine: Optional[Text] = "", api_key: Optional[Text] = None +) -> None: """CLI wrapper function for the ONBOARD_MODEL function in ModelFactory. Args: @@ -150,22 +155,25 @@ def onboard_model(model_id: Text, image_tag: Text, image_hash: Text, Returns: None - """ - ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash, - host_machine, api_key) + """ + ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash, host_machine, api_key) ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) + @click.command("hf-model") @click.option("--name", help="User-defined name for Hugging Face model.") @click.option("--hf-repo-id", help="Repository ID from Hugging Face in {supplier}/{model name} form.") @click.option("--revision", default="", help="Commit hash of repository.") @click.option("--hf-token", default=None, help="Hugging Face token used to authenticate to this model.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") -def deploy_huggingface_model(name: Text, hf_repo_id: Text, - hf_token: Optional[Text] = None, - revision: Optional[Text] = None, - api_key: Optional[Text] = None) -> None: +def deploy_huggingface_model( + name: Text, + hf_repo_id: Text, + hf_token: Optional[Text] = None, + revision: Optional[Text] = None, + api_key: Optional[Text] = None, +) -> None: """CLI wrapper function for the DEPLOY_HUGGINGFACE_MODEL function in ModelFactory. Args: @@ -179,6 +187,7 @@ def deploy_huggingface_model(name: Text, hf_repo_id: Text, ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) + @click.command("hf-model-status") @click.option("--model-id", help="Model ID from DEPLOY_HUGGINGFACE_MODEL.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") diff --git a/aixplain/factories/file_factory.py b/aixplain/factories/file_factory.py index adbebcd3..2085c75d 100644 --- a/aixplain/factories/file_factory.py +++ b/aixplain/factories/file_factory.py @@ -145,5 +145,5 @@ def create( """ assert ( license is not None if is_temp is False else True - ), "File Asset Creation Error: To upload a non-temporary file, you need to specify the `license`." + ), "File Asset Creation Error: To upload a non-temporary file, you need to specify the `license`." return cls.upload(local_path=local_path, tags=tags, license=license, is_temp=is_temp) diff --git a/aixplain/factories/finetune_factory/__init__.py b/aixplain/factories/finetune_factory/__init__.py index 7b05b759..7a23c527 100644 --- a/aixplain/factories/finetune_factory/__init__.py +++ b/aixplain/factories/finetune_factory/__init__.py @@ -88,11 +88,11 @@ def create( assert ( train_percentage + dev_percentage <= 100 ), f"Create FineTune: Train percentage + dev percentage ({train_percentage + dev_percentage}) must be less than or equal to one" - + for i, dataset in enumerate(dataset_list): if isinstance(dataset, str) is True: dataset_list[i] = DatasetFactory.get(dataset_id=dataset) - + if isinstance(model, str) is True: model = ModelFactory.get(model_id=model) diff --git a/aixplain/factories/wallet_factory.py b/aixplain/factories/wallet_factory.py index b36000f1..01c0ac2e 100644 --- a/aixplain/factories/wallet_factory.py +++ b/aixplain/factories/wallet_factory.py @@ -10,7 +10,7 @@ class WalletFactory: backend_url = config.BACKEND_URL @classmethod - def get(cls, api_key: Text = config.TEAM_API_KEY) -> Wallet: + def get(cls, api_key: Text = config.TEAM_API_KEY) -> Wallet: """Get wallet information""" try: resp = None @@ -22,7 +22,7 @@ def get(cls, api_key: Text = config.TEAM_API_KEY) -> Wallet: resp = r.json() total_balance = float(resp.get("totalBalance", 0.0)) reserved_balance = float(resp.get("reservedBalance", 0.0)) - + return Wallet(total_balance=total_balance, reserved_balance=reserved_balance) except Exception as e: raise Exception(f"Failed to get the wallet credit information. Error: {str(e)}") diff --git a/aixplain/modules/benchmark.py b/aixplain/modules/benchmark.py index 7a674e05..3f77cb07 100644 --- a/aixplain/modules/benchmark.py +++ b/aixplain/modules/benchmark.py @@ -56,7 +56,7 @@ def __init__( description: Text = "", supplier: Text = "aiXplain", version: Text = "1.0", - **additional_info + **additional_info, ) -> None: """Create a Benchmark with the necessary information. @@ -84,7 +84,6 @@ def __init__( def __repr__(self) -> str: return f"" - def start(self) -> BenchmarkJob: """Starts a new benchmark job(run) for the current benchmark @@ -104,4 +103,4 @@ def start(self) -> BenchmarkJob: except Exception as e: error_message = f"Starting Benchmark Job: Error in Creating Benchmark {benhchmark_id} : {e}" logging.error(error_message, exc_info=True) - return None \ No newline at end of file + return None diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index 7dae2d96..c06063fc 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -7,6 +7,7 @@ from pathlib import Path from aixplain.utils.file_utils import _request_with_retry, save_file + class BenchmarkJob: """Benchmark Job Represents a single run of an already created Benchmark. @@ -35,29 +36,29 @@ def __init__(self, id: Text, status: Text, benchmark_id: Text, **additional_info @classmethod def _create_benchmark_job_from_response(cls, response: Dict): return BenchmarkJob(response["jobId"], response["status"], response["benchmark"]["id"]) - + @classmethod def _fetch_current_response(cls, job_id: Text) -> dict: url = urljoin(config.BACKEND_URL, f"sdk/benchmarks/jobs/{job_id}") - if config.AIXPLAIN_API_KEY != "": + if config.AIXPLAIN_API_KEY != "": headers = {"x-aixplain-key": f"{config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} else: headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers) resp = r.json() return resp - + def _update_from_response(self, response: dict): - self.status = response['status'] + self.status = response["status"] def __repr__(self) -> str: return f"" - + def check_status(self): response = self._fetch_current_response(self.id) self._update_from_response(response) return self.status - + def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataframe: bool = False): """Get the results of the benchmark job in a CSV format. The results can either be downloaded locally or returned in the form of pandas.DataFrame. @@ -73,7 +74,7 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf try: resp = self._fetch_current_response(self.id) logging.info(f"Downloading Benchmark Results: Status of downloading results for {self.id}: {resp}") - if "reportUrl" not in resp or resp['reportUrl'] == "": + if "reportUrl" not in resp or resp["reportUrl"] == "": logging.error( f"Downloading Benchmark Results: Can't get download results as they aren't generated yet. Please wait for a while." ) @@ -92,9 +93,9 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}" logging.error(error_message, exc_info=True) raise Exception(error_message) - + def __simplify_scores(self, scores): - simplified_score_list = [] + simplified_score_list = [] for model_id, model_info in scores.items(): model_scores = model_info["rawScores"] # model = Mode @@ -104,9 +105,6 @@ def __simplify_scores(self, scores): simplified_score_list.append(row) return simplified_score_list - - - def get_scores(self, return_simplified=True, return_as_dataframe=True): try: resp = self._fetch_current_response(self.id) @@ -115,13 +113,13 @@ def get_scores(self, return_simplified=True, return_as_dataframe=True): for iteration_info in iterations: model_id = iteration_info["pipeline"] model_info = { - "creditsUsed" : round(iteration_info.get("credits", 0),5), - "timeSpent" : round(iteration_info.get("runtime", 0),2), - "status" : iteration_info["status"], - "rawScores" : iteration_info["scores"], + "creditsUsed": round(iteration_info.get("credits", 0), 5), + "timeSpent": round(iteration_info.get("runtime", 0), 2), + "status": iteration_info["status"], + "rawScores": iteration_info["scores"], } scores[model_id] = model_info - + if return_simplified: simplified_scores = self.__simplify_scores(scores) if return_as_dataframe: @@ -133,8 +131,7 @@ def get_scores(self, return_simplified=True, return_as_dataframe=True): error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}" logging.error(error_message, exc_info=True) raise Exception(error_message) - - + def get_failuire_rate(self, return_as_dataframe=True): try: scores = self.get_scores(return_simplified=False) @@ -143,10 +140,10 @@ def get_failuire_rate(self, return_as_dataframe=True): if len(model_info["rawScores"]) == 0: failure_rates[model_id] = 0 continue - score_info = model_info["rawScores"][0] + score_info = model_info["rawScores"][0] num_succesful = score_info["count"] num_failed = score_info["failedSegmentsCount"] - failuire_rate = (num_failed * 100) / (num_succesful+num_failed) + failuire_rate = (num_failed * 100) / (num_succesful + num_failed) failure_rates[model_id] = failuire_rate if return_as_dataframe: df = pd.DataFrame() @@ -159,7 +156,7 @@ def get_failuire_rate(self, return_as_dataframe=True): error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}" logging.error(error_message, exc_info=True) raise Exception(error_message) - + def get_all_explanations(self): try: resp = self._fetch_current_response(self) @@ -173,7 +170,7 @@ def get_all_explanations(self): error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" logging.error(error_message, exc_info=True) raise Exception(error_message) - + def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False): try: raw_explanations = self.get_all_explanations() @@ -205,7 +202,7 @@ def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool else: localized_explanations = raw_explanations["metricInDependent"] if len(localized_explanations) == 0: - localized_explanations = {} + localized_explanations = {} else: localized_explanations = localized_explanations[0] return localized_explanations @@ -213,4 +210,4 @@ def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool except Exception as e: error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" logging.error(error_message, exc_info=True) - raise Exception(error_message) \ No newline at end of file + raise Exception(error_message) diff --git a/aixplain/modules/finetune/status.py b/aixplain/modules/finetune/status.py index 4994ce55..5f27aa72 100644 --- a/aixplain/modules/finetune/status.py +++ b/aixplain/modules/finetune/status.py @@ -26,6 +26,7 @@ from dataclasses_json import dataclass_json from typing import Optional, Text + @dataclass_json @dataclass class FinetuneStatus(object): diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 765960d4..41abf865 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -31,6 +31,8 @@ from aixplain.utils.file_utils import _request_with_retry from typing import Union, Optional, Text, Dict from datetime import datetime +from aixplain.modules.model.response import ModelResponse +from aixplain.enums import ModelStatus class Model(Asset): @@ -145,7 +147,7 @@ def sync_poll(self, poll_url: Text, name: Text = "model_process", wait_time: flo if wait_time < 60: wait_time *= 1.1 except Exception as e: - response_body = {"status": "FAILED", "completed": False, "error": "No response from the service."} + response_body = {"status": "FAILED", "completed": False, "error_message": "No response from the service."} logging.error(f"Polling for Model: polling for {name}: {e}") break if response_body["completed"] is True: @@ -157,7 +159,7 @@ def sync_poll(self, poll_url: Text, name: Text = "model_process", wait_time: flo ) return response_body - def poll(self, poll_url: Text, name: Text = "model_process") -> Dict: + def poll(self, poll_url: Text, name: Text = "model_process") -> ModelResponse: """Poll the platform to check whether an asynchronous call is done. Args: @@ -172,16 +174,31 @@ def poll(self, poll_url: Text, name: Text = "model_process") -> Dict: try: resp = r.json() if resp["completed"] is True: - resp["status"] = "SUCCESS" - if "error" in resp or "supplierError" in resp: - resp["status"] = "FAILED" + status = ModelStatus.SUCCESS + if "error_message" in resp or "supplierError" in resp: + status = ModelStatus.FAILED else: - resp["status"] = "IN_PROGRESS" + status = ModelStatus.IN_PROGRESS logging.debug(f"Single Poll for Model: Status of polling for {name}: {resp}") + return ModelResponse( + status=resp.pop("status", status), + data=resp.pop("data", ""), + details=resp.pop("details", {}), + completed=resp.pop("completed", False), + error_message=resp.pop("error_message", ""), + used_credits=resp.pop("usedCredits", 0), + run_time=resp.pop("runTime", 0), + usage=resp.pop("usage", None), + **resp, + ) except Exception as e: resp = {"status": "FAILED"} logging.error(f"Single Poll for Model: Error of polling for {name}: {e}") - return resp + return ModelResponse( + status=ModelStatus.FAILED, + error_message=str(e), + completed=False, + ) def run( self, @@ -190,7 +207,7 @@ def run( timeout: float = 300, parameters: Optional[Dict] = {}, wait_time: float = 0.5, - ) -> Dict: + ) -> ModelResponse: """Runs a model call. Args: @@ -212,15 +229,25 @@ def run( try: poll_url = response["url"] end = time.time() - response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) + return self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) except Exception as e: msg = f"Error in request for {name} - {traceback.format_exc()}" logging.error(f"Model Run: Error in running for {name}: {e}") end = time.time() - response = {"status": "FAILED", "error": msg, "elapsed_time": end - start} - return response - - def run_async(self, data: Union[Text, Dict], name: Text = "model_process", parameters: Optional[Dict] = {}) -> Dict: + response = {"status": "FAILED", "error": msg, "runTime": end - start} + return ModelResponse( + status=response.pop("status", ModelStatus.FAILED), + data=response.pop("data", ""), + details=response.pop("details", {}), + completed=response.pop("completed", False), + error_message=response.pop("error_message", ""), + used_credits=response.pop("usedCredits", 0), + run_time=response.pop("runTime", 0), + usage=response.pop("usage", None), + **response, + ) + + def run_async(self, data: Union[Text, Dict], name: Text = "model_process", parameters: Optional[Dict] = {}) -> ModelResponse: """Runs asynchronously a model call. Args: @@ -235,7 +262,15 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param logging.debug(f"Model Run Async: Start service for {name} - {url}") payload = build_payload(data=data, parameters=parameters) response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) - return response + return ModelResponse( + status=response.pop("status", ModelStatus.FAILED), + data=response.pop("data", ""), + details=response.pop("details", {}), + completed=response.pop("completed", False), + error_message=response.pop("error_message", ""), + url=response.pop("url", None), + **response, + ) def check_finetune_status(self, after_epoch: Optional[int] = None): """Check the status of the FineTune model. diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index f48a3068..941c4a6f 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -28,6 +28,8 @@ from aixplain.modules.model.utils import build_payload, call_run_endpoint from aixplain.utils import config from typing import Union, Optional, List, Text, Dict +from aixplain.modules.model.response import ModelResponse +from aixplain.enums import ModelStatus class LLM(Model): @@ -104,7 +106,7 @@ def run( timeout: float = 300, parameters: Optional[Dict] = {}, wait_time: float = 0.5, - ) -> Dict: + ) -> ModelResponse: """Synchronously running a Large Language Model (LLM) model. Args: @@ -142,13 +144,24 @@ def run( try: poll_url = response["url"] end = time.time() - response = self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) + return self.sync_poll(poll_url, name=name, timeout=timeout, wait_time=wait_time) + except Exception as e: msg = f"Error in request for {name} - {traceback.format_exc()}" logging.error(f"Model Run: Error in running for {name}: {e}") end = time.time() response = {"status": "FAILED", "error": msg, "elapsed_time": end - start} - return response + return ModelResponse( + status=response.pop("status", ModelStatus.FAILED), + data=response.pop("data", ""), + details=response.pop("details", {}), + completed=response.pop("completed", False), + error_message=response.pop("error_message", ""), + used_credits=response.pop("usedCredits", 0), + run_time=response.pop("runTime", 0), + usage=response.pop("usage", None), + **response, + ) def run_async( self, @@ -161,7 +174,7 @@ def run_async( top_p: float = 1.0, name: Text = "model_process", parameters: Optional[Dict] = {}, - ) -> Dict: + ) -> ModelResponse: """Runs asynchronously a model call. Args: @@ -192,4 +205,12 @@ def run_async( ) payload = build_payload(data=data, parameters=parameters) response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) - return response + return ModelResponse( + status=response.pop("status", ModelStatus.FAILED), + data=response.pop("data", ""), + details=response.pop("details", {}), + completed=response.pop("completed", False), + error_message=response.pop("error_message", ""), + url=response.pop("url", None), + **response, + ) diff --git a/aixplain/modules/model/response.py b/aixplain/modules/model/response.py new file mode 100644 index 00000000..902b9987 --- /dev/null +++ b/aixplain/modules/model/response.py @@ -0,0 +1,70 @@ +from dataclasses import dataclass +from typing import Text, Any, Optional, Dict, List, Union +from aixplain.enums import ModelStatus + + +@dataclass +class ModelResponse: + """ModelResponse class to store the response of the model run.""" + + def __init__( + self, + status: ModelStatus, + data: Text = "", + details: Optional[Union[Dict, List]] = {}, + completed: bool = False, + error_message: Text = "", + used_credits: float = 0.0, + run_time: float = 0.0, + usage: Optional[Dict] = None, + url: Optional[Text] = None, + **kwargs, + ): + self.status = status + self.data = data + self.details = details + self.completed = completed + self.error_message = error_message + self.used_credits = used_credits + self.run_time = run_time + self.usage = usage + self.url = url + self.additional_fields = kwargs + + def __getitem__(self, key: Text) -> Any: + if key in self.__dict__: + return self.__dict__[key] + elif self.additional_fields and key in self.additional_fields: + return self.additional_fields[key] + elif key == "usedCredits": + return self.used_credits + elif key == "runTime": + return self.run_time + raise KeyError(f"Key '{key}' not found in ModelResponse.") + + def get(self, key: Text) -> Any: + return self[key] + + def __repr__(self) -> str: + fields = [] + if self.status: + fields.append(f"status={self.status}") + if self.data: + fields.append(f"data='{self.data}'") + if self.details: + fields.append(f"details={self.details}") + if self.completed: + fields.append(f"completed={self.completed}") + if self.error_message: + fields.append(f"error_message='{self.error_message}'") + if self.used_credits: + fields.append(f"used_credits={self.used_credits}") + if self.run_time: + fields.append(f"run_time={self.run_time}") + if self.usage: + fields.append(f"usage={self.usage}") + if self.url: + fields.append(f"url='{self.url}'") + if self.additional_fields: + fields.extend([f"{k}={repr(v)}" for k, v in self.additional_fields.items()]) + return f"ModelResponse({', '.join(fields)})" diff --git a/aixplain/modules/pipeline/default.py b/aixplain/modules/pipeline/default.py index b0499159..41ae3c71 100644 --- a/aixplain/modules/pipeline/default.py +++ b/aixplain/modules/pipeline/default.py @@ -3,7 +3,6 @@ class DefaultPipeline(PipelineAsset, DesignerPipeline): - def __init__(self, *args, **kwargs): PipelineAsset.__init__(self, *args, **kwargs) DesignerPipeline.__init__(self) diff --git a/aixplain/modules/pipeline/designer/__init__.py b/aixplain/modules/pipeline/designer/__init__.py index 81571f21..6a493aa4 100644 --- a/aixplain/modules/pipeline/designer/__init__.py +++ b/aixplain/modules/pipeline/designer/__init__.py @@ -10,7 +10,7 @@ BaseSegmentor, BaseMetric, BareAsset, - BareMetric + BareMetric, ) from .pipeline import DesignerPipeline from .base import ( @@ -68,5 +68,5 @@ "TI", "TO", "BaseMetric", - "BareMetric" + "BareMetric", ] diff --git a/aixplain/modules/pipeline/designer/base.py b/aixplain/modules/pipeline/designer/base.py index 49c68463..a925840f 100644 --- a/aixplain/modules/pipeline/designer/base.py +++ b/aixplain/modules/pipeline/designer/base.py @@ -207,9 +207,7 @@ def validate(self): # Should we check for data type mismatch? if from_param.data_type and to_param.data_type: if from_param.data_type != to_param.data_type: - raise ValueError( - f"Data type mismatch between {from_param.data_type} and {to_param.data_type}" # noqa - ) + raise ValueError(f"Data type mismatch between {from_param.data_type} and {to_param.data_type}") # noqa def attach_to(self, pipeline: "DesignerPipeline"): """ @@ -260,9 +258,7 @@ def add_param(self, param: Param) -> None: if not hasattr(self, param.code): setattr(self, param.code, param) - def _create_param( - self, code: str, data_type: DataType = None, value: any = None - ) -> Param: + def _create_param(self, code: str, data_type: DataType = None, value: any = None) -> Param: raise NotImplementedError() def create_param( @@ -326,9 +322,7 @@ def _create_param( class Outputs(ParamProxy): - def _create_param( - self, code: str, data_type: DataType = None, value: any = None - ) -> OutputParam: + def _create_param(self, code: str, data_type: DataType = None, value: any = None) -> OutputParam: return OutputParam(code=code, data_type=data_type, value=value) diff --git a/aixplain/modules/pipeline/designer/mixins.py b/aixplain/modules/pipeline/designer/mixins.py index e5aad3c4..44f653bf 100644 --- a/aixplain/modules/pipeline/designer/mixins.py +++ b/aixplain/modules/pipeline/designer/mixins.py @@ -53,9 +53,7 @@ def route(self, *params: Param) -> Node: """ assert self.pipeline, "Node not attached to a pipeline" - router = self.pipeline.router( - [(param.data_type, param.node) for param in params] - ) + router = self.pipeline.router([(param.data_type, param.node) for param in params]) self.outputs.input.link(router.inputs.input) for param in params: router.outputs.input.link(param) diff --git a/aixplain/modules/pipeline/designer/pipeline.py b/aixplain/modules/pipeline/designer/pipeline.py index d0522038..ece5ac0c 100644 --- a/aixplain/modules/pipeline/designer/pipeline.py +++ b/aixplain/modules/pipeline/designer/pipeline.py @@ -3,18 +3,7 @@ from aixplain.enums import DataType from .base import Serializable, Node, Link -from .nodes import ( - AssetNode, - Decision, - Script, - Input, - Output, - Router, - Route, - BareReconstructor, - BareSegmentor, - BareMetric -) +from .nodes import AssetNode, Decision, Script, Input, Output, Router, Route, BareReconstructor, BareSegmentor, BareMetric from .enums import NodeType, RouteType, Operation from .mixins import OutputableMixin @@ -121,10 +110,7 @@ def is_param_linked(self, node, param): :return: True if the param is linked, False otherwise """ for link in self.links: - if ( - link.to_node.number == node.number - and param.code == link.to_param - ): + if link.to_node.number == node.number and param.code == link.to_param: return True return False @@ -148,9 +134,7 @@ def validate_params(self): for node in self.nodes: for param in node.inputs: if param.is_required and not self.is_param_set(node, param): - raise ValueError( - f"Param {param.code} of node {node.label} is required" - ) + raise ValueError(f"Param {param.code} of node {node.label} is required") def validate(self): """ @@ -176,11 +160,7 @@ def get_link(self, from_node: int, to_node: int) -> Link: :return: the link """ return next( - ( - link - for link in self.links - if link.from_node == from_node and link.to_node == to_node - ), + (link for link in self.links if link.from_node == from_node and link.to_node == to_node), None, ) @@ -192,9 +172,7 @@ def get_node(self, node_number: int) -> Node: :param node_number: the node number :return: the node """ - return next( - (node for node in self.nodes if node.number == node_number), None - ) + return next((node for node in self.nodes if node.number == node_number), None) def auto_infer(self): """ @@ -228,9 +206,7 @@ def infer_data_type(node): infer_data_type(self) infer_data_type(to_node) - def asset( - self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs - ) -> T: + def asset(self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs) -> T: """ Shortcut to create an asset node for the current pipeline. All params will be passed as keyword arguments to the node diff --git a/aixplain/modules/pipeline/generate.py b/aixplain/modules/pipeline/generate.py index a64917c1..46c95482 100644 --- a/aixplain/modules/pipeline/generate.py +++ b/aixplain/modules/pipeline/generate.py @@ -148,12 +148,7 @@ def populate_specs(functions: list): for function in functions: # slugify function name by trimming some special chars and # transforming it to snake case - function_name = ( - function["id"] - .replace("-", "_") - .replace("(", "_") - .replace(")", "_") - ) + function_name = function["id"].replace("-", "_").replace("(", "_").replace(")", "_") base_class = "AssetNode" is_segmentor = function["id"] in SEGMENTOR_FUNCTIONS is_reconstructor = function["id"] in RECONSTRUCTOR_FUNCTIONS @@ -170,9 +165,7 @@ def populate_specs(functions: list): "is_reconstructor": function["id"] in RECONSTRUCTOR_FUNCTIONS, "function_name": function_name, "base_class": base_class, - "class_name": "".join( - [w.title() for w in function_name.split("_")] - ), + "class_name": "".join([w.title() for w in function_name.split("_")]), "description": function["metaData"]["description"], "input_type": function["metaData"]["InputType"], "output_type": function["metaData"]["OutputType"], @@ -209,9 +202,7 @@ def populate_specs(functions: list): data_types = populate_data_types(functions) specs = populate_specs(functions) - print( - f"Populating module with {len(data_types)} data types and {len(specs)} specs" - ) + print(f"Populating module with {len(data_types)} data types and {len(specs)} specs") env = Environment( loader=BaseLoader(), trim_blocks=True, diff --git a/aixplain/modules/pipeline/pipeline.py b/aixplain/modules/pipeline/pipeline.py index e5675e4b..bf67ff15 100644 --- a/aixplain/modules/pipeline/pipeline.py +++ b/aixplain/modules/pipeline/pipeline.py @@ -4,18 +4,7 @@ from typing import Union, Type from aixplain.enums import DataType -from .designer import ( - InputParam, - OutputParam, - Inputs, - Outputs, - TI, - TO, - AssetNode, - BaseReconstructor, - BaseSegmentor, - BaseMetric -) +from .designer import InputParam, OutputParam, Inputs, Outputs, TI, TO, AssetNode, BaseReconstructor, BaseSegmentor, BaseMetric from .default import DefaultPipeline from aixplain.modules import asset @@ -38,13 +27,14 @@ def __init__(self, node=None): class ObjectDetection(AssetNode[ObjectDetectionInputs, ObjectDetectionOutputs]): """ - Object Detection is a computer vision technology that identifies and locates -objects within an image, typically by drawing bounding boxes around the -detected objects and classifying them into predefined categories. + Object Detection is a computer vision technology that identifies and locates + objects within an image, typically by drawing bounding boxes around the + detected objects and classifying them into predefined categories. - InputType: video - OutputType: text + InputType: video + OutputType: text """ + function: str = "object-detection" input_type: str = DataType.VIDEO output_type: str = DataType.TEXT @@ -71,12 +61,13 @@ def __init__(self, node=None): class LanguageIdentification(AssetNode[LanguageIdentificationInputs, LanguageIdentificationOutputs]): """ - Language Identification is the process of automatically determining the -language in which a given piece of text is written. + Language Identification is the process of automatically determining the + language in which a given piece of text is written. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "language-identification" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -105,14 +96,15 @@ def __init__(self, node=None): class Ocr(AssetNode[OcrInputs, OcrOutputs]): """ - OCR, or Optical Character Recognition, is a technology that converts different -types of documents, such as scanned paper documents, PDFs, or images captured -by a digital camera, into editable and searchable data by recognizing and -extracting text from the images. + OCR, or Optical Character Recognition, is a technology that converts different + types of documents, such as scanned paper documents, PDFs, or images captured + by a digital camera, into editable and searchable data by recognizing and + extracting text from the images. - InputType: image - OutputType: text + InputType: image + OutputType: text """ + function: str = "ocr" input_type: str = DataType.IMAGE output_type: str = DataType.TEXT @@ -139,13 +131,14 @@ def __init__(self, node=None): class ScriptExecution(AssetNode[ScriptExecutionInputs, ScriptExecutionOutputs]): """ - Script Execution refers to the process of running a set of programmed -instructions or code within a computing environment, enabling the automated -performance of tasks, calculations, or operations as defined by the script. + Script Execution refers to the process of running a set of programmed + instructions or code within a computing environment, enabling the automated + performance of tasks, calculations, or operations as defined by the script. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "script-execution" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -174,13 +167,14 @@ def __init__(self, node=None): class ImageLabelDetection(AssetNode[ImageLabelDetectionInputs, ImageLabelDetectionOutputs]): """ - Image Label Detection is a function that automatically identifies and assigns -descriptive tags or labels to objects, scenes, or elements within an image, -enabling easier categorization, search, and analysis of visual content. + Image Label Detection is a function that automatically identifies and assigns + descriptive tags or labels to objects, scenes, or elements within an image, + enabling easier categorization, search, and analysis of visual content. - InputType: image - OutputType: label + InputType: image + OutputType: label """ + function: str = "image-label-detection" input_type: str = DataType.IMAGE output_type: str = DataType.LABEL @@ -207,14 +201,15 @@ def __init__(self, node=None): class ImageCaptioning(AssetNode[ImageCaptioningInputs, ImageCaptioningOutputs]): """ - Image Captioning is a process that involves generating a textual description of -an image, typically using machine learning models to analyze the visual content -and produce coherent and contextually relevant sentences that describe the -objects, actions, and scenes depicted in the image. + Image Captioning is a process that involves generating a textual description of + an image, typically using machine learning models to analyze the visual content + and produce coherent and contextually relevant sentences that describe the + objects, actions, and scenes depicted in the image. - InputType: image - OutputType: text + InputType: image + OutputType: text """ + function: str = "image-captioning" input_type: str = DataType.IMAGE output_type: str = DataType.TEXT @@ -241,12 +236,13 @@ def __init__(self, node=None): class AudioLanguageIdentification(AssetNode[AudioLanguageIdentificationInputs, AudioLanguageIdentificationOutputs]): """ - Audio Language Identification is a process that involves analyzing an audio -recording to determine the language being spoken. + Audio Language Identification is a process that involves analyzing an audio + recording to determine the language being spoken. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "audio-language-identification" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -273,13 +269,14 @@ def __init__(self, node=None): class AsrAgeClassification(AssetNode[AsrAgeClassificationInputs, AsrAgeClassificationOutputs]): """ - The ASR Age Classification function is designed to analyze audio recordings of -speech to determine the speaker's age group by leveraging automatic speech -recognition (ASR) technology and machine learning algorithms. + The ASR Age Classification function is designed to analyze audio recordings of + speech to determine the speaker's age group by leveraging automatic speech + recognition (ASR) technology and machine learning algorithms. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "asr-age-classification" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -310,13 +307,14 @@ def __init__(self, node=None): class BenchmarkScoringMt(AssetNode[BenchmarkScoringMtInputs, BenchmarkScoringMtOutputs]): """ - Benchmark Scoring MT is a function designed to evaluate and score machine -translation systems by comparing their output against a set of predefined -benchmarks, thereby assessing their accuracy and performance. + Benchmark Scoring MT is a function designed to evaluate and score machine + translation systems by comparing their output against a set of predefined + benchmarks, thereby assessing their accuracy and performance. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "benchmark-scoring-mt" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -343,12 +341,13 @@ def __init__(self, node=None): class AsrGenderClassification(AssetNode[AsrGenderClassificationInputs, AsrGenderClassificationOutputs]): """ - The ASR Gender Classification function analyzes audio recordings to determine -and classify the speaker's gender based on their voice characteristics. + The ASR Gender Classification function analyzes audio recordings to determine + and classify the speaker's gender based on their voice characteristics. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "asr-gender-classification" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -377,13 +376,14 @@ def __init__(self, node=None): class BaseModel(AssetNode[BaseModelInputs, BaseModelOutputs]): """ - The Base-Model function serves as a foundational framework designed to provide -essential features and capabilities upon which more specialized or advanced -models can be built and customized. + The Base-Model function serves as a foundational framework designed to provide + essential features and capabilities upon which more specialized or advanced + models can be built and customized. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "base-model" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -410,12 +410,13 @@ def __init__(self, node=None): class LanguageIdentificationAudio(AssetNode[LanguageIdentificationAudioInputs, LanguageIdentificationAudioOutputs]): """ - The Language Identification Audio function analyzes audio input to determine -and identify the language being spoken. + The Language Identification Audio function analyzes audio input to determine + and identify the language being spoken. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "language-identification-audio" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -442,14 +443,15 @@ def __init__(self, node=None): class Loglikelihood(AssetNode[LoglikelihoodInputs, LoglikelihoodOutputs]): """ - The Log Likelihood function measures the probability of observing the given -data under a specific statistical model by taking the natural logarithm of the -likelihood function, thereby transforming the product of probabilities into a -sum, which simplifies the process of optimization and parameter estimation. + The Log Likelihood function measures the probability of observing the given + data under a specific statistical model by taking the natural logarithm of the + likelihood function, thereby transforming the product of probabilities into a + sum, which simplifies the process of optimization and parameter estimation. - InputType: text - OutputType: number + InputType: text + OutputType: number """ + function: str = "loglikelihood" input_type: str = DataType.TEXT output_type: str = DataType.NUMBER @@ -478,13 +480,14 @@ def __init__(self, node=None): class VideoEmbedding(AssetNode[VideoEmbeddingInputs, VideoEmbeddingOutputs]): """ - Video Embedding is a process that transforms video content into a fixed- -dimensional vector representation, capturing essential features and patterns to -facilitate tasks such as retrieval, classification, and recommendation. + Video Embedding is a process that transforms video content into a fixed- + dimensional vector representation, capturing essential features and patterns to + facilitate tasks such as retrieval, classification, and recommendation. - InputType: video - OutputType: embedding + InputType: video + OutputType: embedding """ + function: str = "video-embedding" input_type: str = DataType.VIDEO output_type: str = DataType.EMBEDDING @@ -513,13 +516,14 @@ def __init__(self, node=None): class TextSegmenation(AssetNode[TextSegmenationInputs, TextSegmenationOutputs]): """ - Text Segmentation is the process of dividing a continuous text into meaningful -units, such as words, sentences, or topics, to facilitate easier analysis and -understanding. + Text Segmentation is the process of dividing a continuous text into meaningful + units, such as words, sentences, or topics, to facilitate easier analysis and + understanding. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "text-segmenation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -548,14 +552,15 @@ def __init__(self, node=None): class ImageEmbedding(AssetNode[ImageEmbeddingInputs, ImageEmbeddingOutputs]): """ - Image Embedding is a process that transforms an image into a fixed-dimensional -vector representation, capturing its essential features and enabling efficient -comparison, retrieval, and analysis in various machine learning and computer -vision tasks. + Image Embedding is a process that transforms an image into a fixed-dimensional + vector representation, capturing its essential features and enabling efficient + comparison, retrieval, and analysis in various machine learning and computer + vision tasks. - InputType: image - OutputType: text + InputType: image + OutputType: text """ + function: str = "image-embedding" input_type: str = DataType.IMAGE output_type: str = DataType.TEXT @@ -584,13 +589,14 @@ def __init__(self, node=None): class ImageManipulation(AssetNode[ImageManipulationInputs, ImageManipulationOutputs]): """ - Image Manipulation refers to the process of altering or enhancing digital -images using various techniques and tools to achieve desired visual effects, -correct imperfections, or transform the image's appearance. + Image Manipulation refers to the process of altering or enhancing digital + images using various techniques and tools to achieve desired visual effects, + correct imperfections, or transform the image's appearance. - InputType: image - OutputType: image + InputType: image + OutputType: image """ + function: str = "image-manipulation" input_type: str = DataType.IMAGE output_type: str = DataType.IMAGE @@ -619,14 +625,15 @@ def __init__(self, node=None): class ImageToVideoGeneration(AssetNode[ImageToVideoGenerationInputs, ImageToVideoGenerationOutputs]): """ - The Image To Video Generation function transforms a series of static images -into a cohesive, dynamic video sequence, often incorporating transitions, -effects, and synchronization with audio to create a visually engaging -narrative. + The Image To Video Generation function transforms a series of static images + into a cohesive, dynamic video sequence, often incorporating transitions, + effects, and synchronization with audio to create a visually engaging + narrative. - InputType: image - OutputType: video + InputType: image + OutputType: video """ + function: str = "image-to-video-generation" input_type: str = DataType.IMAGE output_type: str = DataType.VIDEO @@ -663,13 +670,14 @@ def __init__(self, node=None): class AudioForcedAlignment(AssetNode[AudioForcedAlignmentInputs, AudioForcedAlignmentOutputs]): """ - Audio Forced Alignment is a process that synchronizes a given audio recording -with its corresponding transcript by precisely aligning each spoken word or -phoneme to its exact timing within the audio. + Audio Forced Alignment is a process that synchronizes a given audio recording + with its corresponding transcript by precisely aligning each spoken word or + phoneme to its exact timing within the audio. - InputType: audio - OutputType: audio + InputType: audio + OutputType: audio """ + function: str = "audio-forced-alignment" input_type: str = DataType.AUDIO output_type: str = DataType.AUDIO @@ -700,13 +708,14 @@ def __init__(self, node=None): class BenchmarkScoringAsr(AssetNode[BenchmarkScoringAsrInputs, BenchmarkScoringAsrOutputs]): """ - Benchmark Scoring ASR is a function that evaluates and compares the performance -of automatic speech recognition systems by analyzing their accuracy, speed, and -other relevant metrics against a standardized set of benchmarks. + Benchmark Scoring ASR is a function that evaluates and compares the performance + of automatic speech recognition systems by analyzing their accuracy, speed, and + other relevant metrics against a standardized set of benchmarks. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "benchmark-scoring-asr" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -737,13 +746,14 @@ def __init__(self, node=None): class VisualQuestionAnswering(AssetNode[VisualQuestionAnsweringInputs, VisualQuestionAnsweringOutputs]): """ - Visual Question Answering (VQA) is a task in artificial intelligence that -involves analyzing an image and providing accurate, contextually relevant -answers to questions posed about the visual content of that image. + Visual Question Answering (VQA) is a task in artificial intelligence that + involves analyzing an image and providing accurate, contextually relevant + answers to questions posed about the visual content of that image. - InputType: image - OutputType: video + InputType: image + OutputType: video """ + function: str = "visual-question-answering" input_type: str = DataType.IMAGE output_type: str = DataType.VIDEO @@ -770,13 +780,14 @@ def __init__(self, node=None): class DocumentImageParsing(AssetNode[DocumentImageParsingInputs, DocumentImageParsingOutputs]): """ - Document Image Parsing is the process of analyzing and converting scanned or -photographed images of documents into structured, machine-readable formats by -identifying and extracting text, layout, and other relevant information. + Document Image Parsing is the process of analyzing and converting scanned or + photographed images of documents into structured, machine-readable formats by + identifying and extracting text, layout, and other relevant information. - InputType: image - OutputType: text + InputType: image + OutputType: text """ + function: str = "document-image-parsing" input_type: str = DataType.IMAGE output_type: str = DataType.TEXT @@ -803,14 +814,15 @@ def __init__(self, node=None): class DocumentInformationExtraction(AssetNode[DocumentInformationExtractionInputs, DocumentInformationExtractionOutputs]): """ - Document Information Extraction is the process of automatically identifying, -extracting, and structuring relevant data from unstructured or semi-structured -documents, such as invoices, receipts, contracts, and forms, to facilitate -easier data management and analysis. + Document Information Extraction is the process of automatically identifying, + extracting, and structuring relevant data from unstructured or semi-structured + documents, such as invoices, receipts, contracts, and forms, to facilitate + easier data management and analysis. - InputType: image - OutputType: text + InputType: image + OutputType: text """ + function: str = "document-information-extraction" input_type: str = DataType.IMAGE output_type: str = DataType.TEXT @@ -839,13 +851,14 @@ def __init__(self, node=None): class DepthEstimation(AssetNode[DepthEstimationInputs, DepthEstimationOutputs]): """ - Depth estimation is a computational process that determines the distance of -objects from a viewpoint, typically using visual data from cameras or sensors -to create a three-dimensional understanding of a scene. + Depth estimation is a computational process that determines the distance of + objects from a viewpoint, typically using visual data from cameras or sensors + to create a three-dimensional understanding of a scene. - InputType: image - OutputType: text + InputType: image + OutputType: text """ + function: str = "depth-estimation" input_type: str = DataType.IMAGE output_type: str = DataType.TEXT @@ -872,14 +885,15 @@ def __init__(self, node=None): class VideoGeneration(AssetNode[VideoGenerationInputs, VideoGenerationOutputs]): """ - Video Generation is the process of creating video content through automated or -semi-automated means, often utilizing algorithms, artificial intelligence, or -software tools to produce visual and audio elements that can range from simple -animations to complex, realistic scenes. + Video Generation is the process of creating video content through automated or + semi-automated means, often utilizing algorithms, artificial intelligence, or + software tools to produce visual and audio elements that can range from simple + animations to complex, realistic scenes. - InputType: text - OutputType: video + InputType: text + OutputType: video """ + function: str = "video-generation" input_type: str = DataType.TEXT output_type: str = DataType.VIDEO @@ -908,15 +922,18 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ReferencelessAudioGenerationMetric(BaseMetric[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs]): +class ReferencelessAudioGenerationMetric( + BaseMetric[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs] +): """ - The Referenceless Audio Generation Metric is a tool designed to evaluate the -quality of generated audio content without the need for a reference or original -audio sample for comparison. + The Referenceless Audio Generation Metric is a tool designed to evaluate the + quality of generated audio content without the need for a reference or original + audio sample for comparison. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "referenceless-audio-generation-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -943,13 +960,14 @@ def __init__(self, node=None): class MultiClassImageClassification(AssetNode[MultiClassImageClassificationInputs, MultiClassImageClassificationOutputs]): """ - Multi Class Image Classification is a machine learning task where an algorithm -is trained to categorize images into one of several predefined classes or -categories based on their visual content. + Multi Class Image Classification is a machine learning task where an algorithm + is trained to categorize images into one of several predefined classes or + categories based on their visual content. - InputType: image - OutputType: label + InputType: image + OutputType: label """ + function: str = "multi-class-image-classification" input_type: str = DataType.IMAGE output_type: str = DataType.LABEL @@ -976,13 +994,14 @@ def __init__(self, node=None): class SemanticSegmentation(AssetNode[SemanticSegmentationInputs, SemanticSegmentationOutputs]): """ - Semantic segmentation is a computer vision process that involves classifying -each pixel in an image into a predefined category, effectively partitioning the -image into meaningful segments based on the objects or regions they represent. + Semantic segmentation is a computer vision process that involves classifying + each pixel in an image into a predefined category, effectively partitioning the + image into meaningful segments based on the objects or regions they represent. - InputType: image - OutputType: label + InputType: image + OutputType: label """ + function: str = "semantic-segmentation" input_type: str = DataType.IMAGE output_type: str = DataType.LABEL @@ -1009,14 +1028,15 @@ def __init__(self, node=None): class InstanceSegmentation(AssetNode[InstanceSegmentationInputs, InstanceSegmentationOutputs]): """ - Instance segmentation is a computer vision task that involves detecting and -delineating each distinct object within an image, assigning a unique label and -precise boundary to every individual instance of objects, even if they belong -to the same category. + Instance segmentation is a computer vision task that involves detecting and + delineating each distinct object within an image, assigning a unique label and + precise boundary to every individual instance of objects, even if they belong + to the same category. - InputType: image - OutputType: label + InputType: image + OutputType: label """ + function: str = "instance-segmentation" input_type: str = DataType.IMAGE output_type: str = DataType.LABEL @@ -1043,14 +1063,15 @@ def __init__(self, node=None): class ImageColorization(AssetNode[ImageColorizationInputs, ImageColorizationOutputs]): """ - Image colorization is a process that involves adding color to grayscale images, -transforming them from black-and-white to full-color representations, often -using advanced algorithms and machine learning techniques to predict and apply -the appropriate hues and shades. + Image colorization is a process that involves adding color to grayscale images, + transforming them from black-and-white to full-color representations, often + using advanced algorithms and machine learning techniques to predict and apply + the appropriate hues and shades. - InputType: image - OutputType: image + InputType: image + OutputType: image """ + function: str = "image-colorization" input_type: str = DataType.IMAGE output_type: str = DataType.IMAGE @@ -1083,14 +1104,15 @@ def __init__(self, node=None): class AudioGenerationMetric(BaseMetric[AudioGenerationMetricInputs, AudioGenerationMetricOutputs]): """ - The Audio Generation Metric is a quantitative measure used to evaluate the -quality, accuracy, and overall performance of audio generated by artificial -intelligence systems, often considering factors such as fidelity, -intelligibility, and similarity to human-produced audio. + The Audio Generation Metric is a quantitative measure used to evaluate the + quality, accuracy, and overall performance of audio generated by artificial + intelligence systems, often considering factors such as fidelity, + intelligibility, and similarity to human-produced audio. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "audio-generation-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -1117,14 +1139,15 @@ def __init__(self, node=None): class ImageImpainting(AssetNode[ImageImpaintingInputs, ImageImpaintingOutputs]): """ - Image inpainting is a process that involves filling in missing or damaged parts -of an image in a way that is visually coherent and seamlessly blends with the -surrounding areas, often using advanced algorithms and techniques to restore -the image to its original or intended appearance. + Image inpainting is a process that involves filling in missing or damaged parts + of an image in a way that is visually coherent and seamlessly blends with the + surrounding areas, often using advanced algorithms and techniques to restore + the image to its original or intended appearance. - InputType: image - OutputType: image + InputType: image + OutputType: image """ + function: str = "image-impainting" input_type: str = DataType.IMAGE output_type: str = DataType.IMAGE @@ -1151,14 +1174,15 @@ def __init__(self, node=None): class StyleTransfer(AssetNode[StyleTransferInputs, StyleTransferOutputs]): """ - Style Transfer is a technique in artificial intelligence that applies the -visual style of one image (such as the brushstrokes of a famous painting) to -the content of another image, effectively blending the artistic elements of the -first image with the subject matter of the second. + Style Transfer is a technique in artificial intelligence that applies the + visual style of one image (such as the brushstrokes of a famous painting) to + the content of another image, effectively blending the artistic elements of the + first image with the subject matter of the second. - InputType: image - OutputType: image + InputType: image + OutputType: image """ + function: str = "style-transfer" input_type: str = DataType.IMAGE output_type: str = DataType.IMAGE @@ -1187,13 +1211,14 @@ def __init__(self, node=None): class MultiClassTextClassification(AssetNode[MultiClassTextClassificationInputs, MultiClassTextClassificationOutputs]): """ - Multi Class Text Classification is a natural language processing task that -involves categorizing a given text into one of several predefined classes or -categories based on its content. + Multi Class Text Classification is a natural language processing task that + involves categorizing a given text into one of several predefined classes or + categories based on its content. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "multi-class-text-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -1226,14 +1251,15 @@ def __init__(self, node=None): class TextEmbedding(AssetNode[TextEmbeddingInputs, TextEmbeddingOutputs]): """ - Text embedding is a process that converts text into numerical vectors, -capturing the semantic meaning and contextual relationships of words or -phrases, enabling machines to understand and analyze natural language more -effectively. + Text embedding is a process that converts text into numerical vectors, + capturing the semantic meaning and contextual relationships of words or + phrases, enabling machines to understand and analyze natural language more + effectively. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "text-embedding" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -1262,14 +1288,15 @@ def __init__(self, node=None): class MultiLabelTextClassification(AssetNode[MultiLabelTextClassificationInputs, MultiLabelTextClassificationOutputs]): """ - Multi Label Text Classification is a natural language processing task where a -given text is analyzed and assigned multiple relevant labels or categories from -a predefined set, allowing for the text to belong to more than one category -simultaneously. + Multi Label Text Classification is a natural language processing task where a + given text is analyzed and assigned multiple relevant labels or categories from + a predefined set, allowing for the text to belong to more than one category + simultaneously. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "multi-label-text-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -1296,12 +1323,13 @@ def __init__(self, node=None): class TextReconstruction(BaseReconstructor[TextReconstructionInputs, TextReconstructionOutputs]): """ - Text Reconstruction is a process that involves piecing together fragmented or -incomplete text data to restore it to its original, coherent form. + Text Reconstruction is a process that involves piecing together fragmented or + incomplete text data to restore it to its original, coherent form. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "text-reconstruction" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -1330,13 +1358,14 @@ def __init__(self, node=None): class FactChecking(AssetNode[FactCheckingInputs, FactCheckingOutputs]): """ - Fact Checking is the process of verifying the accuracy and truthfulness of -information, statements, or claims by cross-referencing with reliable sources -and evidence. + Fact Checking is the process of verifying the accuracy and truthfulness of + information, statements, or claims by cross-referencing with reliable sources + and evidence. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "fact-checking" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -1369,13 +1398,14 @@ def __init__(self, node=None): class SpeechClassification(AssetNode[SpeechClassificationInputs, SpeechClassificationOutputs]): """ - Speech Classification is a process that involves analyzing and categorizing -spoken language into predefined categories or classes based on various features -such as tone, pitch, and linguistic content. + Speech Classification is a process that involves analyzing and categorizing + spoken language into predefined categories or classes based on various features + such as tone, pitch, and linguistic content. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "speech-classification" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -1404,14 +1434,15 @@ def __init__(self, node=None): class IntentClassification(AssetNode[IntentClassificationInputs, IntentClassificationOutputs]): """ - Intent Classification is a natural language processing task that involves -analyzing and categorizing user text input to determine the underlying purpose -or goal behind the communication, such as booking a flight, asking for weather -information, or setting a reminder. + Intent Classification is a natural language processing task that involves + analyzing and categorizing user text input to determine the underlying purpose + or goal behind the communication, such as booking a flight, asking for weather + information, or setting a reminder. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "intent-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -1440,14 +1471,15 @@ def __init__(self, node=None): class PartOfSpeechTagging(AssetNode[PartOfSpeechTaggingInputs, PartOfSpeechTaggingOutputs]): """ - Part of Speech Tagging is a natural language processing task that involves -assigning each word in a sentence its corresponding part of speech, such as -noun, verb, adjective, or adverb, based on its role and context within the -sentence. + Part of Speech Tagging is a natural language processing task that involves + assigning each word in a sentence its corresponding part of speech, such as + noun, verb, adjective, or adverb, based on its role and context within the + sentence. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "part-of-speech-tagging" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -1474,14 +1506,15 @@ def __init__(self, node=None): class MetricAggregation(BaseMetric[MetricAggregationInputs, MetricAggregationOutputs]): """ - Metric Aggregation is a function that computes and summarizes numerical data by -applying statistical operations, such as averaging, summing, or finding the -minimum and maximum values, to provide insights and facilitate analysis of -large datasets. + Metric Aggregation is a function that computes and summarizes numerical data by + applying statistical operations, such as averaging, summing, or finding the + minimum and maximum values, to provide insights and facilitate analysis of + large datasets. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "metric-aggregation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -1510,14 +1543,15 @@ def __init__(self, node=None): class DialectDetection(AssetNode[DialectDetectionInputs, DialectDetectionOutputs]): """ - Dialect Detection is a function that identifies and classifies the specific -regional or social variations of a language spoken or written by an individual, -enabling the recognition of distinct linguistic patterns and nuances associated -with different dialects. + Dialect Detection is a function that identifies and classifies the specific + regional or social variations of a language spoken or written by an individual, + enabling the recognition of distinct linguistic patterns and nuances associated + with different dialects. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ + function: str = "dialect-detection" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -1544,13 +1578,14 @@ def __init__(self, node=None): class InverseTextNormalization(AssetNode[InverseTextNormalizationInputs, InverseTextNormalizationOutputs]): """ - Inverse Text Normalization is the process of converting spoken or written -language in its normalized form, such as numbers, dates, and abbreviations, -back into their original, more complex or detailed textual representations. + Inverse Text Normalization is the process of converting spoken or written + language in its normalized form, such as numbers, dates, and abbreviations, + back into their original, more complex or detailed textual representations. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "inverse-text-normalization" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -1579,12 +1614,13 @@ def __init__(self, node=None): class TextToAudio(AssetNode[TextToAudioInputs, TextToAudioOutputs]): """ - The Text to Audio function converts written text into spoken words, allowing -users to listen to the content instead of reading it. + The Text to Audio function converts written text into spoken words, allowing + users to listen to the content instead of reading it. - InputType: text - OutputType: audio + InputType: text + OutputType: audio """ + function: str = "text-to-audio" input_type: str = DataType.TEXT output_type: str = DataType.AUDIO @@ -1617,13 +1653,14 @@ def __init__(self, node=None): class FillTextMask(AssetNode[FillTextMaskInputs, FillTextMaskOutputs]): """ - The "Fill Text Mask" function takes a text input with masked or placeholder -characters and replaces those placeholders with specified or contextually -appropriate characters to generate a complete and coherent text output. + The "Fill Text Mask" function takes a text input with masked or placeholder + characters and replaces those placeholders with specified or contextually + appropriate characters to generate a complete and coherent text output. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "fill-text-mask" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -1652,14 +1689,15 @@ def __init__(self, node=None): class VideoContentModeration(AssetNode[VideoContentModerationInputs, VideoContentModerationOutputs]): """ - Video Content Moderation is the process of reviewing, analyzing, and filtering -video content to ensure it adheres to community guidelines, legal standards, -and platform policies, thereby preventing the dissemination of inappropriate, -harmful, or illegal material. + Video Content Moderation is the process of reviewing, analyzing, and filtering + video content to ensure it adheres to community guidelines, legal standards, + and platform policies, thereby preventing the dissemination of inappropriate, + harmful, or illegal material. - InputType: video - OutputType: label + InputType: video + OutputType: label """ + function: str = "video-content-moderation" input_type: str = DataType.VIDEO output_type: str = DataType.LABEL @@ -1686,13 +1724,14 @@ def __init__(self, node=None): class ExtractAudioFromVideo(AssetNode[ExtractAudioFromVideoInputs, ExtractAudioFromVideoOutputs]): """ - The "Extract Audio From Video" function allows users to separate and save the -audio track from a video file, enabling them to obtain just the sound without -the accompanying visual content. + The "Extract Audio From Video" function allows users to separate and save the + audio track from a video file, enabling them to obtain just the sound without + the accompanying visual content. - InputType: video - OutputType: audio + InputType: video + OutputType: audio """ + function: str = "extract-audio-from-video" input_type: str = DataType.VIDEO output_type: str = DataType.AUDIO @@ -1721,13 +1760,14 @@ def __init__(self, node=None): class ImageCompression(AssetNode[ImageCompressionInputs, ImageCompressionOutputs]): """ - Image compression is a process that reduces the file size of an image by -removing redundant or non-essential data, while maintaining an acceptable level -of visual quality. + Image compression is a process that reduces the file size of an image by + removing redundant or non-essential data, while maintaining an acceptable level + of visual quality. - InputType: image - OutputType: image + InputType: image + OutputType: image """ + function: str = "image-compression" input_type: str = DataType.IMAGE output_type: str = DataType.IMAGE @@ -1756,13 +1796,14 @@ def __init__(self, node=None): class MultilingualSpeechRecognition(AssetNode[MultilingualSpeechRecognitionInputs, MultilingualSpeechRecognitionOutputs]): """ - Multilingual Speech Recognition is a technology that enables the automatic -transcription of spoken language into text across multiple languages, allowing -for seamless communication and understanding in diverse linguistic contexts. + Multilingual Speech Recognition is a technology that enables the automatic + transcription of spoken language into text across multiple languages, allowing + for seamless communication and understanding in diverse linguistic contexts. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ + function: str = "multilingual-speech-recognition" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -1791,16 +1832,19 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ReferencelessTextGenerationMetric(BaseMetric[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs]): +class ReferencelessTextGenerationMetric( + BaseMetric[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs] +): """ - The Referenceless Text Generation Metric is a method for evaluating the quality -of generated text without requiring a reference text for comparison, often -leveraging models or algorithms to assess coherence, relevance, and fluency -based on intrinsic properties of the text itself. + The Referenceless Text Generation Metric is a method for evaluating the quality + of generated text without requiring a reference text for comparison, often + leveraging models or algorithms to assess coherence, relevance, and fluency + based on intrinsic properties of the text itself. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "referenceless-text-generation-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -1833,13 +1877,14 @@ def __init__(self, node=None): class TextGenerationMetricDefault(BaseMetric[TextGenerationMetricDefaultInputs, TextGenerationMetricDefaultOutputs]): """ - The "Text Generation Metric Default" function provides a standard set of -evaluation metrics for assessing the quality and performance of text generation -models. + The "Text Generation Metric Default" function provides a standard set of + evaluation metrics for assessing the quality and performance of text generation + models. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "text-generation-metric-default" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -1866,13 +1911,14 @@ def __init__(self, node=None): class NoiseRemoval(AssetNode[NoiseRemovalInputs, NoiseRemovalOutputs]): """ - Noise Removal is a process that involves identifying and eliminating unwanted -random variations or disturbances from an audio signal to enhance the clarity -and quality of the underlying information. + Noise Removal is a process that involves identifying and eliminating unwanted + random variations or disturbances from an audio signal to enhance the clarity + and quality of the underlying information. - InputType: audio - OutputType: audio + InputType: audio + OutputType: audio """ + function: str = "noise-removal" input_type: str = DataType.AUDIO output_type: str = DataType.AUDIO @@ -1899,13 +1945,14 @@ def __init__(self, node=None): class AudioReconstruction(BaseReconstructor[AudioReconstructionInputs, AudioReconstructionOutputs]): """ - Audio Reconstruction is the process of restoring or recreating audio signals -from incomplete, damaged, or degraded recordings to achieve a high-quality, -accurate representation of the original sound. + Audio Reconstruction is the process of restoring or recreating audio signals + from incomplete, damaged, or degraded recordings to achieve a high-quality, + accurate representation of the original sound. - InputType: audio - OutputType: audio + InputType: audio + OutputType: audio """ + function: str = "audio-reconstruction" input_type: str = DataType.AUDIO output_type: str = DataType.AUDIO @@ -1944,13 +1991,14 @@ def __init__(self, node=None): class VoiceCloning(AssetNode[VoiceCloningInputs, VoiceCloningOutputs]): """ - Voice cloning is a technology that uses artificial intelligence to create a -digital replica of a person's voice, allowing for the generation of speech that -mimics the tone, pitch, and speaking style of the original speaker. + Voice cloning is a technology that uses artificial intelligence to create a + digital replica of a person's voice, allowing for the generation of speech that + mimics the tone, pitch, and speaking style of the original speaker. - InputType: text - OutputType: audio + InputType: text + OutputType: audio """ + function: str = "voice-cloning" input_type: str = DataType.TEXT output_type: str = DataType.AUDIO @@ -1983,14 +2031,15 @@ def __init__(self, node=None): class Diacritization(AssetNode[DiacritizationInputs, DiacritizationOutputs]): """ - Diacritization is the process of adding diacritical marks to letters in a text -to indicate pronunciation, stress, tone, or meaning, often used in languages -such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in -written communication. + Diacritization is the process of adding diacritical marks to letters in a text + to indicate pronunciation, stress, tone, or meaning, often used in languages + such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in + written communication. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "diacritization" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -2017,13 +2066,14 @@ def __init__(self, node=None): class AudioEmotionDetection(AssetNode[AudioEmotionDetectionInputs, AudioEmotionDetectionOutputs]): """ - Audio Emotion Detection is a technology that analyzes vocal characteristics and -patterns in audio recordings to identify and classify the emotional state of -the speaker. + Audio Emotion Detection is a technology that analyzes vocal characteristics and + patterns in audio recordings to identify and classify the emotional state of + the speaker. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "audio-emotion-detection" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -2056,13 +2106,14 @@ def __init__(self, node=None): class TextSummarization(AssetNode[TextSummarizationInputs, TextSummarizationOutputs]): """ - Text summarization is the process of condensing a large body of text into a -shorter version, capturing the main points and essential information while -maintaining coherence and meaning. + Text summarization is the process of condensing a large body of text into a + shorter version, capturing the main points and essential information while + maintaining coherence and meaning. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "text-summarization" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -2093,14 +2144,15 @@ def __init__(self, node=None): class EntityLinking(AssetNode[EntityLinkingInputs, EntityLinkingOutputs]): """ - Entity Linking is the process of identifying and connecting mentions of -entities within a text to their corresponding entries in a structured knowledge -base, thereby enabling the disambiguation of terms and enhancing the -understanding of the text's context. + Entity Linking is the process of identifying and connecting mentions of + entities within a text to their corresponding entries in a structured knowledge + base, thereby enabling the disambiguation of terms and enhancing the + understanding of the text's context. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "entity-linking" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -2133,14 +2185,15 @@ def __init__(self, node=None): class TextGenerationMetric(BaseMetric[TextGenerationMetricInputs, TextGenerationMetricOutputs]): """ - A Text Generation Metric is a quantitative measure used to evaluate the quality -and effectiveness of text produced by natural language processing models, often -assessing aspects such as coherence, relevance, fluency, and adherence to given -prompts or instructions. + A Text Generation Metric is a quantitative measure used to evaluate the quality + and effectiveness of text produced by natural language processing models, often + assessing aspects such as coherence, relevance, fluency, and adherence to given + prompts or instructions. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "text-generation-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -2169,12 +2222,13 @@ def __init__(self, node=None): class SplitOnLinebreak(BaseSegmentor[SplitOnLinebreakInputs, SplitOnLinebreakOutputs]): """ - The "Split On Linebreak" function divides a given string into a list of -substrings, using linebreaks (newline characters) as the points of separation. + The "Split On Linebreak" function divides a given string into a list of + substrings, using linebreaks (newline characters) as the points of separation. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "split-on-linebreak" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -2207,14 +2261,15 @@ def __init__(self, node=None): class SentimentAnalysis(AssetNode[SentimentAnalysisInputs, SentimentAnalysisOutputs]): """ - Sentiment Analysis is a natural language processing technique used to determine -and classify the emotional tone or subjective information expressed in a piece -of text, such as identifying whether the sentiment is positive, negative, or -neutral. + Sentiment Analysis is a natural language processing technique used to determine + and classify the emotional tone or subjective information expressed in a piece + of text, such as identifying whether the sentiment is positive, negative, or + neutral. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "sentiment-analysis" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -2241,13 +2296,14 @@ def __init__(self, node=None): class KeywordSpotting(AssetNode[KeywordSpottingInputs, KeywordSpottingOutputs]): """ - Keyword Spotting is a function that enables the detection and identification of -specific words or phrases within a stream of audio, often used in voice- -activated systems to trigger actions or commands based on recognized keywords. + Keyword Spotting is a function that enables the detection and identification of + specific words or phrases within a stream of audio, often used in voice- + activated systems to trigger actions or commands based on recognized keywords. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "keyword-spotting" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -2280,14 +2336,15 @@ def __init__(self, node=None): class TextClassification(AssetNode[TextClassificationInputs, TextClassificationOutputs]): """ - Text Classification is a natural language processing task that involves -categorizing text into predefined labels or classes based on its content, -enabling automated organization, filtering, and analysis of large volumes of -textual data. + Text Classification is a natural language processing task that involves + categorizing text into predefined labels or classes based on its content, + enabling automated organization, filtering, and analysis of large volumes of + textual data. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "text-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -2316,14 +2373,15 @@ def __init__(self, node=None): class OtherMultipurpose(AssetNode[OtherMultipurposeInputs, OtherMultipurposeOutputs]): """ - The "Other (Multipurpose)" function serves as a versatile category designed to -accommodate a wide range of tasks and activities that do not fit neatly into -predefined classifications, offering flexibility and adaptability for various -needs. + The "Other (Multipurpose)" function serves as a versatile category designed to + accommodate a wide range of tasks and activities that do not fit neatly into + predefined classifications, offering flexibility and adaptability for various + needs. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "other-(multipurpose)" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -2362,13 +2420,14 @@ def __init__(self, node=None): class SpeechSynthesis(AssetNode[SpeechSynthesisInputs, SpeechSynthesisOutputs]): """ - Speech synthesis is the artificial production of human speech, typically -achieved through software or hardware systems that convert text into spoken -words, enabling machines to communicate verbally with users. + Speech synthesis is the artificial production of human speech, typically + achieved through software or hardware systems that convert text into spoken + words, enabling machines to communicate verbally with users. - InputType: text - OutputType: audio + InputType: text + OutputType: audio """ + function: str = "speech-synthesis" input_type: str = DataType.TEXT output_type: str = DataType.AUDIO @@ -2395,14 +2454,15 @@ def __init__(self, node=None): class AudioIntentDetection(AssetNode[AudioIntentDetectionInputs, AudioIntentDetectionOutputs]): """ - Audio Intent Detection is a process that involves analyzing audio signals to -identify and interpret the underlying intentions or purposes behind spoken -words, enabling systems to understand and respond appropriately to human -speech. + Audio Intent Detection is a process that involves analyzing audio signals to + identify and interpret the underlying intentions or purposes behind spoken + words, enabling systems to understand and respond appropriately to human + speech. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "audio-intent-detection" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -2431,14 +2491,15 @@ def __init__(self, node=None): class VideoLabelDetection(AssetNode[VideoLabelDetectionInputs, VideoLabelDetectionOutputs]): """ - Video Label Detection is a function that automatically identifies and tags -various objects, scenes, activities, and other relevant elements within a -video, providing descriptive labels that enhance searchability and content -organization. + Video Label Detection is a function that automatically identifies and tags + various objects, scenes, activities, and other relevant elements within a + video, providing descriptive labels that enhance searchability and content + organization. - InputType: video - OutputType: label + InputType: video + OutputType: label """ + function: str = "video-label-detection" input_type: str = DataType.VIDEO output_type: str = DataType.LABEL @@ -2467,13 +2528,14 @@ def __init__(self, node=None): class AsrQualityEstimation(AssetNode[AsrQualityEstimationInputs, AsrQualityEstimationOutputs]): """ - ASR Quality Estimation is a process that evaluates the accuracy and reliability -of automatic speech recognition systems by analyzing their performance in -transcribing spoken language into text. + ASR Quality Estimation is a process that evaluates the accuracy and reliability + of automatic speech recognition systems by analyzing their performance in + transcribing spoken language into text. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "asr-quality-estimation" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -2508,14 +2570,15 @@ def __init__(self, node=None): class AudioTranscriptAnalysis(AssetNode[AudioTranscriptAnalysisInputs, AudioTranscriptAnalysisOutputs]): """ - Audio Transcript Analysis is a process that involves converting spoken language -from audio recordings into written text, followed by examining and interpreting -the transcribed content to extract meaningful insights, identify patterns, and -derive actionable information. + Audio Transcript Analysis is a process that involves converting spoken language + from audio recordings into written text, followed by examining and interpreting + the transcribed content to extract meaningful insights, identify patterns, and + derive actionable information. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ + function: str = "audio-transcript-analysis" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -2542,13 +2605,14 @@ def __init__(self, node=None): class Search(AssetNode[SearchInputs, SearchOutputs]): """ - The "Search" function allows users to input keywords or phrases to quickly -locate specific information, files, or content within a database, website, or -application. + The "Search" function allows users to input keywords or phrases to quickly + locate specific information, files, or content within a database, website, or + application. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "search" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -2585,13 +2649,14 @@ def __init__(self, node=None): class VideoForcedAlignment(AssetNode[VideoForcedAlignmentInputs, VideoForcedAlignmentOutputs]): """ - Video Forced Alignment is a process that synchronizes video footage with -corresponding audio tracks by precisely aligning the visual and auditory -elements, ensuring that the movements of speakers' lips match the spoken words. + Video Forced Alignment is a process that synchronizes video footage with + corresponding audio tracks by precisely aligning the visual and auditory + elements, ensuring that the movements of speakers' lips match the spoken words. - InputType: video - OutputType: video + InputType: video + OutputType: video """ + function: str = "video-forced-alignment" input_type: str = DataType.VIDEO output_type: str = DataType.VIDEO @@ -2624,13 +2689,14 @@ def __init__(self, node=None): class VisemeGeneration(AssetNode[VisemeGenerationInputs, VisemeGenerationOutputs]): """ - Viseme Generation is the process of creating visual representations of -phonemes, which are the distinct units of sound in speech, to synchronize lip -movements with spoken words in animations or virtual avatars. + Viseme Generation is the process of creating visual representations of + phonemes, which are the distinct units of sound in speech, to synchronize lip + movements with spoken words in animations or virtual avatars. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "viseme-generation" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -2663,13 +2729,14 @@ def __init__(self, node=None): class TopicClassification(AssetNode[TopicClassificationInputs, TopicClassificationOutputs]): """ - Topic Classification is a natural language processing function that categorizes -text into predefined topics or subjects based on its content, enabling -efficient organization and retrieval of information. + Topic Classification is a natural language processing function that categorizes + text into predefined topics or subjects based on its content, enabling + efficient organization and retrieval of information. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "topic-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -2702,13 +2769,14 @@ def __init__(self, node=None): class OffensiveLanguageIdentification(AssetNode[OffensiveLanguageIdentificationInputs, OffensiveLanguageIdentificationOutputs]): """ - Offensive Language Identification is a function that analyzes text to detect -and flag language that is abusive, harmful, or inappropriate, helping to -maintain a respectful and safe communication environment. + Offensive Language Identification is a function that analyzes text to detect + and flag language that is abusive, harmful, or inappropriate, helping to + maintain a respectful and safe communication environment. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "offensive-language-identification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -2745,13 +2813,14 @@ def __init__(self, node=None): class SpeechTranslation(AssetNode[SpeechTranslationInputs, SpeechTranslationOutputs]): """ - Speech Translation is a technology that converts spoken language in real-time -from one language to another, enabling seamless communication between speakers -of different languages. + Speech Translation is a technology that converts spoken language in real-time + from one language to another, enabling seamless communication between speakers + of different languages. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ + function: str = "speech-translation" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -2786,14 +2855,15 @@ def __init__(self, node=None): class SpeakerDiarizationAudio(BaseSegmentor[SpeakerDiarizationAudioInputs, SpeakerDiarizationAudioOutputs]): """ - Speaker Diarization Audio is a process that involves segmenting an audio -recording into distinct sections, each corresponding to a different speaker, in -order to identify and differentiate between multiple speakers within the same -audio stream. + Speaker Diarization Audio is a process that involves segmenting an audio + recording into distinct sections, each corresponding to a different speaker, in + order to identify and differentiate between multiple speakers within the same + audio stream. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "speaker-diarization-audio" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -2830,14 +2900,15 @@ def __init__(self, node=None): class AudioTranscriptImprovement(AssetNode[AudioTranscriptImprovementInputs, AudioTranscriptImprovementOutputs]): """ - Audio Transcript Improvement is a function that enhances the accuracy and -clarity of transcribed audio recordings by correcting errors, refining -language, and ensuring the text faithfully represents the original spoken -content. + Audio Transcript Improvement is a function that enhances the accuracy and + clarity of transcribed audio recordings by correcting errors, refining + language, and ensuring the text faithfully represents the original spoken + content. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ + function: str = "audio-transcript-improvement" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -2870,14 +2941,15 @@ def __init__(self, node=None): class SpeechNonSpeechClassification(AssetNode[SpeechNonSpeechClassificationInputs, SpeechNonSpeechClassificationOutputs]): """ - The function "Speech or Non-Speech Classification" is designed to analyze audio -input and determine whether the sound is human speech or non-speech noise, -enabling applications such as voice recognition systems to filter out -irrelevant background sounds. + The function "Speech or Non-Speech Classification" is designed to analyze audio + input and determine whether the sound is human speech or non-speech noise, + enabling applications such as voice recognition systems to filter out + irrelevant background sounds. - InputType: audio - OutputType: label + InputType: audio + OutputType: label """ + function: str = "speech-non-speech-classification" input_type: str = DataType.AUDIO output_type: str = DataType.LABEL @@ -2912,13 +2984,14 @@ def __init__(self, node=None): class TextDenormalization(AssetNode[TextDenormalizationInputs, TextDenormalizationOutputs]): """ - Text Denormalization is the process of converting abbreviated, contracted, or -otherwise simplified text into its full, standard form, often to improve -readability and ensure consistency in natural language processing tasks. + Text Denormalization is the process of converting abbreviated, contracted, or + otherwise simplified text into its full, standard form, often to improve + readability and ensure consistency in natural language processing tasks. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "text-denormalization" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -2947,13 +3020,14 @@ def __init__(self, node=None): class ImageContentModeration(AssetNode[ImageContentModerationInputs, ImageContentModerationOutputs]): """ - Image Content Moderation is a process that involves analyzing and filtering -images to detect and manage inappropriate, harmful, or sensitive content, -ensuring compliance with community guidelines and legal standards. + Image Content Moderation is a process that involves analyzing and filtering + images to detect and manage inappropriate, harmful, or sensitive content, + ensuring compliance with community guidelines and legal standards. - InputType: image - OutputType: label + InputType: image + OutputType: label """ + function: str = "image-content-moderation" input_type: str = DataType.IMAGE output_type: str = DataType.LABEL @@ -2982,15 +3056,18 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ReferencelessTextGenerationMetricDefault(BaseMetric[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs]): +class ReferencelessTextGenerationMetricDefault( + BaseMetric[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs] +): """ - The Referenceless Text Generation Metric Default is a function designed to -evaluate the quality of generated text without relying on reference texts for -comparison. + The Referenceless Text Generation Metric Default is a function designed to + evaluate the quality of generated text without relying on reference texts for + comparison. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "referenceless-text-generation-metric-default" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -3025,14 +3102,15 @@ def __init__(self, node=None): class NamedEntityRecognition(AssetNode[NamedEntityRecognitionInputs, NamedEntityRecognitionOutputs]): """ - Named Entity Recognition (NER) is a natural language processing task that -involves identifying and classifying proper nouns in text into predefined -categories such as names of people, organizations, locations, dates, and other -entities. + Named Entity Recognition (NER) is a natural language processing task that + involves identifying and classifying proper nouns in text into predefined + categories such as names of people, organizations, locations, dates, and other + entities. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "named-entity-recognition" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -3065,14 +3143,15 @@ def __init__(self, node=None): class TextContentModeration(AssetNode[TextContentModerationInputs, TextContentModerationOutputs]): """ - Text Content Moderation is the process of reviewing, filtering, and managing -user-generated content to ensure it adheres to community guidelines, legal -standards, and platform policies, thereby maintaining a safe and respectful -online environment. + Text Content Moderation is the process of reviewing, filtering, and managing + user-generated content to ensure it adheres to community guidelines, legal + standards, and platform policies, thereby maintaining a safe and respectful + online environment. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "text-content-moderation" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -3105,13 +3184,14 @@ def __init__(self, node=None): class SpeakerDiarizationVideo(AssetNode[SpeakerDiarizationVideoInputs, SpeakerDiarizationVideoOutputs]): """ - The Speaker Diarization Video function identifies and segments different -speakers in a video, attributing portions of the audio to individual speakers -to facilitate analysis and understanding of multi-speaker conversations. + The Speaker Diarization Video function identifies and segments different + speakers in a video, attributing portions of the audio to individual speakers + to facilitate analysis and understanding of multi-speaker conversations. - InputType: video - OutputType: label + InputType: video + OutputType: label """ + function: str = "speaker-diarization-video" input_type: str = DataType.VIDEO output_type: str = DataType.LABEL @@ -3138,13 +3218,14 @@ def __init__(self, node=None): class SplitOnSilence(AssetNode[SplitOnSilenceInputs, SplitOnSilenceOutputs]): """ - The "Split On Silence" function divides an audio recording into separate -segments based on periods of silence, allowing for easier editing and analysis -of individual sections. + The "Split On Silence" function divides an audio recording into separate + segments based on periods of silence, allowing for easier editing and analysis + of individual sections. - InputType: audio - OutputType: audio + InputType: audio + OutputType: audio """ + function: str = "split-on-silence" input_type: str = DataType.AUDIO output_type: str = DataType.AUDIO @@ -3177,13 +3258,14 @@ def __init__(self, node=None): class EmotionDetection(AssetNode[EmotionDetectionInputs, EmotionDetectionOutputs]): """ - Emotion Detection is a process that involves analyzing text to identify and -categorize the emotional states or sentiments expressed by individuals, such as -happiness, sadness, anger, or fear. + Emotion Detection is a process that involves analyzing text to identify and + categorize the emotional states or sentiments expressed by individuals, such as + happiness, sadness, anger, or fear. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "emotion-detection" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -3216,14 +3298,15 @@ def __init__(self, node=None): class TextSpamDetection(AssetNode[TextSpamDetectionInputs, TextSpamDetectionOutputs]): """ - Text Spam Detection is a process that involves analyzing and identifying -unsolicited or irrelevant messages within text communications, typically using -algorithms and machine learning techniques to filter out spam and ensure the -integrity of the communication platform. + Text Spam Detection is a process that involves analyzing and identifying + unsolicited or irrelevant messages within text communications, typically using + algorithms and machine learning techniques to filter out spam and ensure the + integrity of the communication platform. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "text-spam-detection" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -3264,13 +3347,14 @@ def __init__(self, node=None): class Translation(AssetNode[TranslationInputs, TranslationOutputs]): """ - Translation is the process of converting text from one language into an -equivalent text in another language, preserving the original meaning and -context. + Translation is the process of converting text from one language into an + equivalent text in another language, preserving the original meaning and + context. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "translation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -3307,13 +3391,14 @@ def __init__(self, node=None): class VoiceActivityDetection(BaseSegmentor[VoiceActivityDetectionInputs, VoiceActivityDetectionOutputs]): """ - Voice Activity Detection (VAD) is a technology that identifies the presence or -absence of human speech within an audio signal, enabling systems to distinguish -between spoken words and background noise. + Voice Activity Detection (VAD) is a technology that identifies the presence or + absence of human speech within an audio signal, enabling systems to distinguish + between spoken words and background noise. - InputType: audio - OutputType: audio + InputType: audio + OutputType: audio """ + function: str = "voice-activity-detection" input_type: str = DataType.AUDIO output_type: str = DataType.AUDIO @@ -3346,14 +3431,15 @@ def __init__(self, node=None): class SpeechEmbedding(AssetNode[SpeechEmbeddingInputs, SpeechEmbeddingOutputs]): """ - Speech Embedding is a process that transforms spoken language into a fixed- -dimensional vector representation, capturing essential features and -characteristics of the speech for tasks such as recognition, classification, -and analysis. + Speech Embedding is a process that transforms spoken language into a fixed- + dimensional vector representation, capturing essential features and + characteristics of the speech for tasks such as recognition, classification, + and analysis. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ + function: str = "speech-embedding" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -3388,13 +3474,14 @@ def __init__(self, node=None): class SubtitlingTranslation(AssetNode[SubtitlingTranslationInputs, SubtitlingTranslationOutputs]): """ - Subtitling Translation is the process of converting spoken dialogue from one -language into written text in another language, which is then displayed on- -screen to aid viewers in understanding the content. + Subtitling Translation is the process of converting spoken dialogue from one + language into written text in another language, which is then displayed on- + screen to aid viewers in understanding the content. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "subtitling-translation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -3429,13 +3516,14 @@ def __init__(self, node=None): class TextGeneration(AssetNode[TextGenerationInputs, TextGenerationOutputs]): """ - Text Generation is a process in which artificial intelligence models, such as -neural networks, produce coherent and contextually relevant text based on a -given input or prompt, often mimicking human writing styles and patterns. + Text Generation is a process in which artificial intelligence models, such as + neural networks, produce coherent and contextually relevant text based on a + given input or prompt, often mimicking human writing styles and patterns. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "text-generation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -3470,13 +3558,14 @@ def __init__(self, node=None): class VideoUnderstanding(AssetNode[VideoUnderstandingInputs, VideoUnderstandingOutputs]): """ - Video Understanding is the process of analyzing and interpreting video content -to extract meaningful information, such as identifying objects, actions, -events, and contextual relationships within the footage. + Video Understanding is the process of analyzing and interpreting video content + to extract meaningful information, such as identifying objects, actions, + events, and contextual relationships within the footage. - InputType: video - OutputType: text + InputType: video + OutputType: text """ + function: str = "video-understanding" input_type: str = DataType.VIDEO output_type: str = DataType.TEXT @@ -3505,13 +3594,14 @@ def __init__(self, node=None): class TextToVideoGeneration(AssetNode[TextToVideoGenerationInputs, TextToVideoGenerationOutputs]): """ - Text To Video Generation is a process that converts written descriptions or -scripts into dynamic, visual video content using advanced algorithms and -artificial intelligence. + Text To Video Generation is a process that converts written descriptions or + scripts into dynamic, visual video content using advanced algorithms and + artificial intelligence. - InputType: text - OutputType: video + InputType: text + OutputType: video """ + function: str = "text-to-video-generation" input_type: str = DataType.TEXT output_type: str = DataType.VIDEO @@ -3542,14 +3632,15 @@ def __init__(self, node=None): class TextNormalization(AssetNode[TextNormalizationInputs, TextNormalizationOutputs]): """ - Text normalization is the process of transforming text into a standard, -consistent format by correcting spelling errors, converting all characters to a -uniform case, removing punctuation, and expanding abbreviations to improve the -text's readability and usability for further processing or analysis. + Text normalization is the process of transforming text into a standard, + consistent format by correcting spelling errors, converting all characters to a + uniform case, removing punctuation, and expanding abbreviations to improve the + text's readability and usability for further processing or analysis. - InputType: text - OutputType: label + InputType: text + OutputType: label """ + function: str = "text-normalization" input_type: str = DataType.TEXT output_type: str = DataType.LABEL @@ -3584,12 +3675,13 @@ def __init__(self, node=None): class SpeechRecognition(AssetNode[SpeechRecognitionInputs, SpeechRecognitionOutputs]): """ - Speech recognition is a technology that enables a computer or device to -identify and process spoken language, converting it into text. + Speech recognition is a technology that enables a computer or device to + identify and process spoken language, converting it into text. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ + function: str = "speech-recognition" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -3626,14 +3718,15 @@ def __init__(self, node=None): class Subtitling(AssetNode[SubtitlingInputs, SubtitlingOutputs]): """ - Subtitling is the process of displaying written text on a screen to represent -the spoken dialogue, narration, or other audio elements in a video, typically -to aid viewers who are deaf or hard of hearing, or to provide translations for -audiences who speak different languages. + Subtitling is the process of displaying written text on a screen to represent + the spoken dialogue, narration, or other audio elements in a video, typically + to aid viewers who are deaf or hard of hearing, or to provide translations for + audiences who speak different languages. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ + function: str = "subtitling" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -3668,12 +3761,13 @@ def __init__(self, node=None): class ClassificationMetric(BaseMetric[ClassificationMetricInputs, ClassificationMetricOutputs]): """ - A Classification Metric is a quantitative measure used to evaluate the quality -and effectiveness of classification models. + A Classification Metric is a quantitative measure used to evaluate the quality + and effectiveness of classification models. - InputType: text - OutputType: text + InputType: text + OutputType: text """ + function: str = "classification-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -3700,13 +3794,14 @@ def __init__(self, node=None): class TextToImageGeneration(AssetNode[TextToImageGenerationInputs, TextToImageGenerationOutputs]): """ - Text To Image Generation is a process where a system creates visual images -based on descriptive text input, translating written language into -corresponding graphical representations. + Text To Image Generation is a process where a system creates visual images + based on descriptive text input, translating written language into + corresponding graphical representations. - InputType: text - OutputType: image + InputType: text + OutputType: image """ + function: str = "text-to-image-generation" input_type: str = DataType.TEXT output_type: str = DataType.IMAGE @@ -3715,835 +3810,852 @@ class TextToImageGeneration(AssetNode[TextToImageGenerationInputs, TextToImageGe outputs_class: Type[TO] = TextToImageGenerationOutputs - class Pipeline(DefaultPipeline): - def object_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ObjectDetection: """ - Object Detection is a computer vision technology that identifies and locates -objects within an image, typically by drawing bounding boxes around the -detected objects and classifying them into predefined categories. + Object Detection is a computer vision technology that identifies and locates + objects within an image, typically by drawing bounding boxes around the + detected objects and classifying them into predefined categories. """ return ObjectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentification: """ - Language Identification is the process of automatically determining the -language in which a given piece of text is written. + Language Identification is the process of automatically determining the + language in which a given piece of text is written. """ return LanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) def ocr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Ocr: """ - OCR, or Optical Character Recognition, is a technology that converts different -types of documents, such as scanned paper documents, PDFs, or images captured -by a digital camera, into editable and searchable data by recognizing and -extracting text from the images. + OCR, or Optical Character Recognition, is a technology that converts different + types of documents, such as scanned paper documents, PDFs, or images captured + by a digital camera, into editable and searchable data by recognizing and + extracting text from the images. """ return Ocr(*args, asset_id=asset_id, pipeline=self, **kwargs) def script_execution(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ScriptExecution: """ - Script Execution refers to the process of running a set of programmed -instructions or code within a computing environment, enabling the automated -performance of tasks, calculations, or operations as defined by the script. + Script Execution refers to the process of running a set of programmed + instructions or code within a computing environment, enabling the automated + performance of tasks, calculations, or operations as defined by the script. """ return ScriptExecution(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageLabelDetection: """ - Image Label Detection is a function that automatically identifies and assigns -descriptive tags or labels to objects, scenes, or elements within an image, -enabling easier categorization, search, and analysis of visual content. + Image Label Detection is a function that automatically identifies and assigns + descriptive tags or labels to objects, scenes, or elements within an image, + enabling easier categorization, search, and analysis of visual content. """ return ImageLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_captioning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCaptioning: """ - Image Captioning is a process that involves generating a textual description of -an image, typically using machine learning models to analyze the visual content -and produce coherent and contextually relevant sentences that describe the -objects, actions, and scenes depicted in the image. + Image Captioning is a process that involves generating a textual description of + an image, typically using machine learning models to analyze the visual content + and produce coherent and contextually relevant sentences that describe the + objects, actions, and scenes depicted in the image. """ return ImageCaptioning(*args, asset_id=asset_id, pipeline=self, **kwargs) def audio_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioLanguageIdentification: """ - Audio Language Identification is a process that involves analyzing an audio -recording to determine the language being spoken. + Audio Language Identification is a process that involves analyzing an audio + recording to determine the language being spoken. """ return AudioLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) def asr_age_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrAgeClassification: """ - The ASR Age Classification function is designed to analyze audio recordings of -speech to determine the speaker's age group by leveraging automatic speech -recognition (ASR) technology and machine learning algorithms. + The ASR Age Classification function is designed to analyze audio recordings of + speech to determine the speaker's age group by leveraging automatic speech + recognition (ASR) technology and machine learning algorithms. """ return AsrAgeClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def benchmark_scoring_mt(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringMt: """ - Benchmark Scoring MT is a function designed to evaluate and score machine -translation systems by comparing their output against a set of predefined -benchmarks, thereby assessing their accuracy and performance. + Benchmark Scoring MT is a function designed to evaluate and score machine + translation systems by comparing their output against a set of predefined + benchmarks, thereby assessing their accuracy and performance. """ return BenchmarkScoringMt(*args, asset_id=asset_id, pipeline=self, **kwargs) def asr_gender_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrGenderClassification: """ - The ASR Gender Classification function analyzes audio recordings to determine -and classify the speaker's gender based on their voice characteristics. + The ASR Gender Classification function analyzes audio recordings to determine + and classify the speaker's gender based on their voice characteristics. """ return AsrGenderClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def base_model(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BaseModel: """ - The Base-Model function serves as a foundational framework designed to provide -essential features and capabilities upon which more specialized or advanced -models can be built and customized. + The Base-Model function serves as a foundational framework designed to provide + essential features and capabilities upon which more specialized or advanced + models can be built and customized. """ return BaseModel(*args, asset_id=asset_id, pipeline=self, **kwargs) def language_identification_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentificationAudio: """ - The Language Identification Audio function analyzes audio input to determine -and identify the language being spoken. + The Language Identification Audio function analyzes audio input to determine + and identify the language being spoken. """ return LanguageIdentificationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) def loglikelihood(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Loglikelihood: """ - The Log Likelihood function measures the probability of observing the given -data under a specific statistical model by taking the natural logarithm of the -likelihood function, thereby transforming the product of probabilities into a -sum, which simplifies the process of optimization and parameter estimation. + The Log Likelihood function measures the probability of observing the given + data under a specific statistical model by taking the natural logarithm of the + likelihood function, thereby transforming the product of probabilities into a + sum, which simplifies the process of optimization and parameter estimation. """ return Loglikelihood(*args, asset_id=asset_id, pipeline=self, **kwargs) def video_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoEmbedding: """ - Video Embedding is a process that transforms video content into a fixed- -dimensional vector representation, capturing essential features and patterns to -facilitate tasks such as retrieval, classification, and recommendation. + Video Embedding is a process that transforms video content into a fixed- + dimensional vector representation, capturing essential features and patterns to + facilitate tasks such as retrieval, classification, and recommendation. """ return VideoEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_segmenation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSegmenation: """ - Text Segmentation is the process of dividing a continuous text into meaningful -units, such as words, sentences, or topics, to facilitate easier analysis and -understanding. + Text Segmentation is the process of dividing a continuous text into meaningful + units, such as words, sentences, or topics, to facilitate easier analysis and + understanding. """ return TextSegmenation(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageEmbedding: """ - Image Embedding is a process that transforms an image into a fixed-dimensional -vector representation, capturing its essential features and enabling efficient -comparison, retrieval, and analysis in various machine learning and computer -vision tasks. + Image Embedding is a process that transforms an image into a fixed-dimensional + vector representation, capturing its essential features and enabling efficient + comparison, retrieval, and analysis in various machine learning and computer + vision tasks. """ return ImageEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_manipulation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageManipulation: """ - Image Manipulation refers to the process of altering or enhancing digital -images using various techniques and tools to achieve desired visual effects, -correct imperfections, or transform the image's appearance. + Image Manipulation refers to the process of altering or enhancing digital + images using various techniques and tools to achieve desired visual effects, + correct imperfections, or transform the image's appearance. """ return ImageManipulation(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageToVideoGeneration: """ - The Image To Video Generation function transforms a series of static images -into a cohesive, dynamic video sequence, often incorporating transitions, -effects, and synchronization with audio to create a visually engaging -narrative. + The Image To Video Generation function transforms a series of static images + into a cohesive, dynamic video sequence, often incorporating transitions, + effects, and synchronization with audio to create a visually engaging + narrative. """ return ImageToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) def audio_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioForcedAlignment: """ - Audio Forced Alignment is a process that synchronizes a given audio recording -with its corresponding transcript by precisely aligning each spoken word or -phoneme to its exact timing within the audio. + Audio Forced Alignment is a process that synchronizes a given audio recording + with its corresponding transcript by precisely aligning each spoken word or + phoneme to its exact timing within the audio. """ return AudioForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) def benchmark_scoring_asr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringAsr: """ - Benchmark Scoring ASR is a function that evaluates and compares the performance -of automatic speech recognition systems by analyzing their accuracy, speed, and -other relevant metrics against a standardized set of benchmarks. + Benchmark Scoring ASR is a function that evaluates and compares the performance + of automatic speech recognition systems by analyzing their accuracy, speed, and + other relevant metrics against a standardized set of benchmarks. """ return BenchmarkScoringAsr(*args, asset_id=asset_id, pipeline=self, **kwargs) def visual_question_answering(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisualQuestionAnswering: """ - Visual Question Answering (VQA) is a task in artificial intelligence that -involves analyzing an image and providing accurate, contextually relevant -answers to questions posed about the visual content of that image. + Visual Question Answering (VQA) is a task in artificial intelligence that + involves analyzing an image and providing accurate, contextually relevant + answers to questions posed about the visual content of that image. """ return VisualQuestionAnswering(*args, asset_id=asset_id, pipeline=self, **kwargs) def document_image_parsing(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentImageParsing: """ - Document Image Parsing is the process of analyzing and converting scanned or -photographed images of documents into structured, machine-readable formats by -identifying and extracting text, layout, and other relevant information. + Document Image Parsing is the process of analyzing and converting scanned or + photographed images of documents into structured, machine-readable formats by + identifying and extracting text, layout, and other relevant information. """ return DocumentImageParsing(*args, asset_id=asset_id, pipeline=self, **kwargs) - def document_information_extraction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentInformationExtraction: + def document_information_extraction( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> DocumentInformationExtraction: """ - Document Information Extraction is the process of automatically identifying, -extracting, and structuring relevant data from unstructured or semi-structured -documents, such as invoices, receipts, contracts, and forms, to facilitate -easier data management and analysis. + Document Information Extraction is the process of automatically identifying, + extracting, and structuring relevant data from unstructured or semi-structured + documents, such as invoices, receipts, contracts, and forms, to facilitate + easier data management and analysis. """ return DocumentInformationExtraction(*args, asset_id=asset_id, pipeline=self, **kwargs) def depth_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DepthEstimation: """ - Depth estimation is a computational process that determines the distance of -objects from a viewpoint, typically using visual data from cameras or sensors -to create a three-dimensional understanding of a scene. + Depth estimation is a computational process that determines the distance of + objects from a viewpoint, typically using visual data from cameras or sensors + to create a three-dimensional understanding of a scene. """ return DepthEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) def video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoGeneration: """ - Video Generation is the process of creating video content through automated or -semi-automated means, often utilizing algorithms, artificial intelligence, or -software tools to produce visual and audio elements that can range from simple -animations to complex, realistic scenes. + Video Generation is the process of creating video content through automated or + semi-automated means, often utilizing algorithms, artificial intelligence, or + software tools to produce visual and audio elements that can range from simple + animations to complex, realistic scenes. """ return VideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessAudioGenerationMetric: + def referenceless_audio_generation_metric( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> ReferencelessAudioGenerationMetric: """ - The Referenceless Audio Generation Metric is a tool designed to evaluate the -quality of generated audio content without the need for a reference or original -audio sample for comparison. + The Referenceless Audio Generation Metric is a tool designed to evaluate the + quality of generated audio content without the need for a reference or original + audio sample for comparison. """ return ReferencelessAudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_class_image_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiClassImageClassification: + def multi_class_image_classification( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> MultiClassImageClassification: """ - Multi Class Image Classification is a machine learning task where an algorithm -is trained to categorize images into one of several predefined classes or -categories based on their visual content. + Multi Class Image Classification is a machine learning task where an algorithm + is trained to categorize images into one of several predefined classes or + categories based on their visual content. """ return MultiClassImageClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def semantic_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SemanticSegmentation: """ - Semantic segmentation is a computer vision process that involves classifying -each pixel in an image into a predefined category, effectively partitioning the -image into meaningful segments based on the objects or regions they represent. + Semantic segmentation is a computer vision process that involves classifying + each pixel in an image into a predefined category, effectively partitioning the + image into meaningful segments based on the objects or regions they represent. """ return SemanticSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) def instance_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InstanceSegmentation: """ - Instance segmentation is a computer vision task that involves detecting and -delineating each distinct object within an image, assigning a unique label and -precise boundary to every individual instance of objects, even if they belong -to the same category. + Instance segmentation is a computer vision task that involves detecting and + delineating each distinct object within an image, assigning a unique label and + precise boundary to every individual instance of objects, even if they belong + to the same category. """ return InstanceSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_colorization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageColorization: """ - Image colorization is a process that involves adding color to grayscale images, -transforming them from black-and-white to full-color representations, often -using advanced algorithms and machine learning techniques to predict and apply -the appropriate hues and shades. + Image colorization is a process that involves adding color to grayscale images, + transforming them from black-and-white to full-color representations, often + using advanced algorithms and machine learning techniques to predict and apply + the appropriate hues and shades. """ return ImageColorization(*args, asset_id=asset_id, pipeline=self, **kwargs) def audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioGenerationMetric: """ - The Audio Generation Metric is a quantitative measure used to evaluate the -quality, accuracy, and overall performance of audio generated by artificial -intelligence systems, often considering factors such as fidelity, -intelligibility, and similarity to human-produced audio. + The Audio Generation Metric is a quantitative measure used to evaluate the + quality, accuracy, and overall performance of audio generated by artificial + intelligence systems, often considering factors such as fidelity, + intelligibility, and similarity to human-produced audio. """ return AudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_impainting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageImpainting: """ - Image inpainting is a process that involves filling in missing or damaged parts -of an image in a way that is visually coherent and seamlessly blends with the -surrounding areas, often using advanced algorithms and techniques to restore -the image to its original or intended appearance. + Image inpainting is a process that involves filling in missing or damaged parts + of an image in a way that is visually coherent and seamlessly blends with the + surrounding areas, often using advanced algorithms and techniques to restore + the image to its original or intended appearance. """ return ImageImpainting(*args, asset_id=asset_id, pipeline=self, **kwargs) def style_transfer(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> StyleTransfer: """ - Style Transfer is a technique in artificial intelligence that applies the -visual style of one image (such as the brushstrokes of a famous painting) to -the content of another image, effectively blending the artistic elements of the -first image with the subject matter of the second. + Style Transfer is a technique in artificial intelligence that applies the + visual style of one image (such as the brushstrokes of a famous painting) to + the content of another image, effectively blending the artistic elements of the + first image with the subject matter of the second. """ return StyleTransfer(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_class_text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiClassTextClassification: + def multi_class_text_classification( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> MultiClassTextClassification: """ - Multi Class Text Classification is a natural language processing task that -involves categorizing a given text into one of several predefined classes or -categories based on its content. + Multi Class Text Classification is a natural language processing task that + involves categorizing a given text into one of several predefined classes or + categories based on its content. """ return MultiClassTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextEmbedding: """ - Text embedding is a process that converts text into numerical vectors, -capturing the semantic meaning and contextual relationships of words or -phrases, enabling machines to understand and analyze natural language more -effectively. + Text embedding is a process that converts text into numerical vectors, + capturing the semantic meaning and contextual relationships of words or + phrases, enabling machines to understand and analyze natural language more + effectively. """ return TextEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_label_text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiLabelTextClassification: + def multi_label_text_classification( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> MultiLabelTextClassification: """ - Multi Label Text Classification is a natural language processing task where a -given text is analyzed and assigned multiple relevant labels or categories from -a predefined set, allowing for the text to belong to more than one category -simultaneously. + Multi Label Text Classification is a natural language processing task where a + given text is analyzed and assigned multiple relevant labels or categories from + a predefined set, allowing for the text to belong to more than one category + simultaneously. """ return MultiLabelTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextReconstruction: """ - Text Reconstruction is a process that involves piecing together fragmented or -incomplete text data to restore it to its original, coherent form. + Text Reconstruction is a process that involves piecing together fragmented or + incomplete text data to restore it to its original, coherent form. """ return TextReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) def fact_checking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FactChecking: """ - Fact Checking is the process of verifying the accuracy and truthfulness of -information, statements, or claims by cross-referencing with reliable sources -and evidence. + Fact Checking is the process of verifying the accuracy and truthfulness of + information, statements, or claims by cross-referencing with reliable sources + and evidence. """ return FactChecking(*args, asset_id=asset_id, pipeline=self, **kwargs) def speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechClassification: """ - Speech Classification is a process that involves analyzing and categorizing -spoken language into predefined categories or classes based on various features -such as tone, pitch, and linguistic content. + Speech Classification is a process that involves analyzing and categorizing + spoken language into predefined categories or classes based on various features + such as tone, pitch, and linguistic content. """ return SpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def intent_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> IntentClassification: """ - Intent Classification is a natural language processing task that involves -analyzing and categorizing user text input to determine the underlying purpose -or goal behind the communication, such as booking a flight, asking for weather -information, or setting a reminder. + Intent Classification is a natural language processing task that involves + analyzing and categorizing user text input to determine the underlying purpose + or goal behind the communication, such as booking a flight, asking for weather + information, or setting a reminder. """ return IntentClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def part_of_speech_tagging(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> PartOfSpeechTagging: """ - Part of Speech Tagging is a natural language processing task that involves -assigning each word in a sentence its corresponding part of speech, such as -noun, verb, adjective, or adverb, based on its role and context within the -sentence. + Part of Speech Tagging is a natural language processing task that involves + assigning each word in a sentence its corresponding part of speech, such as + noun, verb, adjective, or adverb, based on its role and context within the + sentence. """ return PartOfSpeechTagging(*args, asset_id=asset_id, pipeline=self, **kwargs) def metric_aggregation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MetricAggregation: """ - Metric Aggregation is a function that computes and summarizes numerical data by -applying statistical operations, such as averaging, summing, or finding the -minimum and maximum values, to provide insights and facilitate analysis of -large datasets. + Metric Aggregation is a function that computes and summarizes numerical data by + applying statistical operations, such as averaging, summing, or finding the + minimum and maximum values, to provide insights and facilitate analysis of + large datasets. """ return MetricAggregation(*args, asset_id=asset_id, pipeline=self, **kwargs) def dialect_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DialectDetection: """ - Dialect Detection is a function that identifies and classifies the specific -regional or social variations of a language spoken or written by an individual, -enabling the recognition of distinct linguistic patterns and nuances associated -with different dialects. + Dialect Detection is a function that identifies and classifies the specific + regional or social variations of a language spoken or written by an individual, + enabling the recognition of distinct linguistic patterns and nuances associated + with different dialects. """ return DialectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def inverse_text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InverseTextNormalization: """ - Inverse Text Normalization is the process of converting spoken or written -language in its normalized form, such as numbers, dates, and abbreviations, -back into their original, more complex or detailed textual representations. + Inverse Text Normalization is the process of converting spoken or written + language in its normalized form, such as numbers, dates, and abbreviations, + back into their original, more complex or detailed textual representations. """ return InverseTextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_to_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToAudio: """ - The Text to Audio function converts written text into spoken words, allowing -users to listen to the content instead of reading it. + The Text to Audio function converts written text into spoken words, allowing + users to listen to the content instead of reading it. """ return TextToAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) def fill_text_mask(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FillTextMask: """ - The "Fill Text Mask" function takes a text input with masked or placeholder -characters and replaces those placeholders with specified or contextually -appropriate characters to generate a complete and coherent text output. + The "Fill Text Mask" function takes a text input with masked or placeholder + characters and replaces those placeholders with specified or contextually + appropriate characters to generate a complete and coherent text output. """ return FillTextMask(*args, asset_id=asset_id, pipeline=self, **kwargs) def video_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoContentModeration: """ - Video Content Moderation is the process of reviewing, analyzing, and filtering -video content to ensure it adheres to community guidelines, legal standards, -and platform policies, thereby preventing the dissemination of inappropriate, -harmful, or illegal material. + Video Content Moderation is the process of reviewing, analyzing, and filtering + video content to ensure it adheres to community guidelines, legal standards, + and platform policies, thereby preventing the dissemination of inappropriate, + harmful, or illegal material. """ return VideoContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) def extract_audio_from_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ExtractAudioFromVideo: """ - The "Extract Audio From Video" function allows users to separate and save the -audio track from a video file, enabling them to obtain just the sound without -the accompanying visual content. + The "Extract Audio From Video" function allows users to separate and save the + audio track from a video file, enabling them to obtain just the sound without + the accompanying visual content. """ return ExtractAudioFromVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_compression(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCompression: """ - Image compression is a process that reduces the file size of an image by -removing redundant or non-essential data, while maintaining an acceptable level -of visual quality. + Image compression is a process that reduces the file size of an image by + removing redundant or non-essential data, while maintaining an acceptable level + of visual quality. """ return ImageCompression(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multilingual_speech_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultilingualSpeechRecognition: + def multilingual_speech_recognition( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> MultilingualSpeechRecognition: """ - Multilingual Speech Recognition is a technology that enables the automatic -transcription of spoken language into text across multiple languages, allowing -for seamless communication and understanding in diverse linguistic contexts. + Multilingual Speech Recognition is a technology that enables the automatic + transcription of spoken language into text across multiple languages, allowing + for seamless communication and understanding in diverse linguistic contexts. """ return MultilingualSpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessTextGenerationMetric: + def referenceless_text_generation_metric( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> ReferencelessTextGenerationMetric: """ - The Referenceless Text Generation Metric is a method for evaluating the quality -of generated text without requiring a reference text for comparison, often -leveraging models or algorithms to assess coherence, relevance, and fluency -based on intrinsic properties of the text itself. + The Referenceless Text Generation Metric is a method for evaluating the quality + of generated text without requiring a reference text for comparison, often + leveraging models or algorithms to assess coherence, relevance, and fluency + based on intrinsic properties of the text itself. """ return ReferencelessTextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetricDefault: """ - The "Text Generation Metric Default" function provides a standard set of -evaluation metrics for assessing the quality and performance of text generation -models. + The "Text Generation Metric Default" function provides a standard set of + evaluation metrics for assessing the quality and performance of text generation + models. """ return TextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) def noise_removal(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NoiseRemoval: """ - Noise Removal is a process that involves identifying and eliminating unwanted -random variations or disturbances from an audio signal to enhance the clarity -and quality of the underlying information. + Noise Removal is a process that involves identifying and eliminating unwanted + random variations or disturbances from an audio signal to enhance the clarity + and quality of the underlying information. """ return NoiseRemoval(*args, asset_id=asset_id, pipeline=self, **kwargs) def audio_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioReconstruction: """ - Audio Reconstruction is the process of restoring or recreating audio signals -from incomplete, damaged, or degraded recordings to achieve a high-quality, -accurate representation of the original sound. + Audio Reconstruction is the process of restoring or recreating audio signals + from incomplete, damaged, or degraded recordings to achieve a high-quality, + accurate representation of the original sound. """ return AudioReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) def voice_cloning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceCloning: """ - Voice cloning is a technology that uses artificial intelligence to create a -digital replica of a person's voice, allowing for the generation of speech that -mimics the tone, pitch, and speaking style of the original speaker. + Voice cloning is a technology that uses artificial intelligence to create a + digital replica of a person's voice, allowing for the generation of speech that + mimics the tone, pitch, and speaking style of the original speaker. """ return VoiceCloning(*args, asset_id=asset_id, pipeline=self, **kwargs) def diacritization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Diacritization: """ - Diacritization is the process of adding diacritical marks to letters in a text -to indicate pronunciation, stress, tone, or meaning, often used in languages -such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in -written communication. + Diacritization is the process of adding diacritical marks to letters in a text + to indicate pronunciation, stress, tone, or meaning, often used in languages + such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in + written communication. """ return Diacritization(*args, asset_id=asset_id, pipeline=self, **kwargs) def audio_emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioEmotionDetection: """ - Audio Emotion Detection is a technology that analyzes vocal characteristics and -patterns in audio recordings to identify and classify the emotional state of -the speaker. + Audio Emotion Detection is a technology that analyzes vocal characteristics and + patterns in audio recordings to identify and classify the emotional state of + the speaker. """ return AudioEmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_summarization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSummarization: """ - Text summarization is the process of condensing a large body of text into a -shorter version, capturing the main points and essential information while -maintaining coherence and meaning. + Text summarization is the process of condensing a large body of text into a + shorter version, capturing the main points and essential information while + maintaining coherence and meaning. """ return TextSummarization(*args, asset_id=asset_id, pipeline=self, **kwargs) def entity_linking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EntityLinking: """ - Entity Linking is the process of identifying and connecting mentions of -entities within a text to their corresponding entries in a structured knowledge -base, thereby enabling the disambiguation of terms and enhancing the -understanding of the text's context. + Entity Linking is the process of identifying and connecting mentions of + entities within a text to their corresponding entries in a structured knowledge + base, thereby enabling the disambiguation of terms and enhancing the + understanding of the text's context. """ return EntityLinking(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetric: """ - A Text Generation Metric is a quantitative measure used to evaluate the quality -and effectiveness of text produced by natural language processing models, often -assessing aspects such as coherence, relevance, fluency, and adherence to given -prompts or instructions. + A Text Generation Metric is a quantitative measure used to evaluate the quality + and effectiveness of text produced by natural language processing models, often + assessing aspects such as coherence, relevance, fluency, and adherence to given + prompts or instructions. """ return TextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) def split_on_linebreak(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnLinebreak: """ - The "Split On Linebreak" function divides a given string into a list of -substrings, using linebreaks (newline characters) as the points of separation. + The "Split On Linebreak" function divides a given string into a list of + substrings, using linebreaks (newline characters) as the points of separation. """ return SplitOnLinebreak(*args, asset_id=asset_id, pipeline=self, **kwargs) def sentiment_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SentimentAnalysis: """ - Sentiment Analysis is a natural language processing technique used to determine -and classify the emotional tone or subjective information expressed in a piece -of text, such as identifying whether the sentiment is positive, negative, or -neutral. + Sentiment Analysis is a natural language processing technique used to determine + and classify the emotional tone or subjective information expressed in a piece + of text, such as identifying whether the sentiment is positive, negative, or + neutral. """ return SentimentAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) def keyword_spotting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> KeywordSpotting: """ - Keyword Spotting is a function that enables the detection and identification of -specific words or phrases within a stream of audio, often used in voice- -activated systems to trigger actions or commands based on recognized keywords. + Keyword Spotting is a function that enables the detection and identification of + specific words or phrases within a stream of audio, often used in voice- + activated systems to trigger actions or commands based on recognized keywords. """ return KeywordSpotting(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextClassification: """ - Text Classification is a natural language processing task that involves -categorizing text into predefined labels or classes based on its content, -enabling automated organization, filtering, and analysis of large volumes of -textual data. + Text Classification is a natural language processing task that involves + categorizing text into predefined labels or classes based on its content, + enabling automated organization, filtering, and analysis of large volumes of + textual data. """ return TextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def other__multipurpose_(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OtherMultipurpose: """ - The "Other (Multipurpose)" function serves as a versatile category designed to -accommodate a wide range of tasks and activities that do not fit neatly into -predefined classifications, offering flexibility and adaptability for various -needs. + The "Other (Multipurpose)" function serves as a versatile category designed to + accommodate a wide range of tasks and activities that do not fit neatly into + predefined classifications, offering flexibility and adaptability for various + needs. """ return OtherMultipurpose(*args, asset_id=asset_id, pipeline=self, **kwargs) def speech_synthesis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechSynthesis: """ - Speech synthesis is the artificial production of human speech, typically -achieved through software or hardware systems that convert text into spoken -words, enabling machines to communicate verbally with users. + Speech synthesis is the artificial production of human speech, typically + achieved through software or hardware systems that convert text into spoken + words, enabling machines to communicate verbally with users. """ return SpeechSynthesis(*args, asset_id=asset_id, pipeline=self, **kwargs) def audio_intent_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioIntentDetection: """ - Audio Intent Detection is a process that involves analyzing audio signals to -identify and interpret the underlying intentions or purposes behind spoken -words, enabling systems to understand and respond appropriately to human -speech. + Audio Intent Detection is a process that involves analyzing audio signals to + identify and interpret the underlying intentions or purposes behind spoken + words, enabling systems to understand and respond appropriately to human + speech. """ return AudioIntentDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def video_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoLabelDetection: """ - Video Label Detection is a function that automatically identifies and tags -various objects, scenes, activities, and other relevant elements within a -video, providing descriptive labels that enhance searchability and content -organization. + Video Label Detection is a function that automatically identifies and tags + various objects, scenes, activities, and other relevant elements within a + video, providing descriptive labels that enhance searchability and content + organization. """ return VideoLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def asr_quality_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrQualityEstimation: """ - ASR Quality Estimation is a process that evaluates the accuracy and reliability -of automatic speech recognition systems by analyzing their performance in -transcribing spoken language into text. + ASR Quality Estimation is a process that evaluates the accuracy and reliability + of automatic speech recognition systems by analyzing their performance in + transcribing spoken language into text. """ return AsrQualityEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) def audio_transcript_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptAnalysis: """ - Audio Transcript Analysis is a process that involves converting spoken language -from audio recordings into written text, followed by examining and interpreting -the transcribed content to extract meaningful insights, identify patterns, and -derive actionable information. + Audio Transcript Analysis is a process that involves converting spoken language + from audio recordings into written text, followed by examining and interpreting + the transcribed content to extract meaningful insights, identify patterns, and + derive actionable information. """ return AudioTranscriptAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) def search(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Search: """ - The "Search" function allows users to input keywords or phrases to quickly -locate specific information, files, or content within a database, website, or -application. + The "Search" function allows users to input keywords or phrases to quickly + locate specific information, files, or content within a database, website, or + application. """ return Search(*args, asset_id=asset_id, pipeline=self, **kwargs) def video_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoForcedAlignment: """ - Video Forced Alignment is a process that synchronizes video footage with -corresponding audio tracks by precisely aligning the visual and auditory -elements, ensuring that the movements of speakers' lips match the spoken words. + Video Forced Alignment is a process that synchronizes video footage with + corresponding audio tracks by precisely aligning the visual and auditory + elements, ensuring that the movements of speakers' lips match the spoken words. """ return VideoForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) def viseme_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisemeGeneration: """ - Viseme Generation is the process of creating visual representations of -phonemes, which are the distinct units of sound in speech, to synchronize lip -movements with spoken words in animations or virtual avatars. + Viseme Generation is the process of creating visual representations of + phonemes, which are the distinct units of sound in speech, to synchronize lip + movements with spoken words in animations or virtual avatars. """ return VisemeGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) def topic_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TopicClassification: """ - Topic Classification is a natural language processing function that categorizes -text into predefined topics or subjects based on its content, enabling -efficient organization and retrieval of information. + Topic Classification is a natural language processing function that categorizes + text into predefined topics or subjects based on its content, enabling + efficient organization and retrieval of information. """ return TopicClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def offensive_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OffensiveLanguageIdentification: + def offensive_language_identification( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> OffensiveLanguageIdentification: """ - Offensive Language Identification is a function that analyzes text to detect -and flag language that is abusive, harmful, or inappropriate, helping to -maintain a respectful and safe communication environment. + Offensive Language Identification is a function that analyzes text to detect + and flag language that is abusive, harmful, or inappropriate, helping to + maintain a respectful and safe communication environment. """ return OffensiveLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) def speech_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechTranslation: """ - Speech Translation is a technology that converts spoken language in real-time -from one language to another, enabling seamless communication between speakers -of different languages. + Speech Translation is a technology that converts spoken language in real-time + from one language to another, enabling seamless communication between speakers + of different languages. """ return SpeechTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) def speaker_diarization_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationAudio: """ - Speaker Diarization Audio is a process that involves segmenting an audio -recording into distinct sections, each corresponding to a different speaker, in -order to identify and differentiate between multiple speakers within the same -audio stream. + Speaker Diarization Audio is a process that involves segmenting an audio + recording into distinct sections, each corresponding to a different speaker, in + order to identify and differentiate between multiple speakers within the same + audio stream. """ return SpeakerDiarizationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) def audio_transcript_improvement(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptImprovement: """ - Audio Transcript Improvement is a function that enhances the accuracy and -clarity of transcribed audio recordings by correcting errors, refining -language, and ensuring the text faithfully represents the original spoken -content. + Audio Transcript Improvement is a function that enhances the accuracy and + clarity of transcribed audio recordings by correcting errors, refining + language, and ensuring the text faithfully represents the original spoken + content. """ return AudioTranscriptImprovement(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_non_speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechNonSpeechClassification: + def speech_non_speech_classification( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> SpeechNonSpeechClassification: """ - The function "Speech or Non-Speech Classification" is designed to analyze audio -input and determine whether the sound is human speech or non-speech noise, -enabling applications such as voice recognition systems to filter out -irrelevant background sounds. + The function "Speech or Non-Speech Classification" is designed to analyze audio + input and determine whether the sound is human speech or non-speech noise, + enabling applications such as voice recognition systems to filter out + irrelevant background sounds. """ return SpeechNonSpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_denormalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextDenormalization: """ - Text Denormalization is the process of converting abbreviated, contracted, or -otherwise simplified text into its full, standard form, often to improve -readability and ensure consistency in natural language processing tasks. + Text Denormalization is the process of converting abbreviated, contracted, or + otherwise simplified text into its full, standard form, often to improve + readability and ensure consistency in natural language processing tasks. """ return TextDenormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) def image_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageContentModeration: """ - Image Content Moderation is a process that involves analyzing and filtering -images to detect and manage inappropriate, harmful, or sensitive content, -ensuring compliance with community guidelines and legal standards. + Image Content Moderation is a process that involves analyzing and filtering + images to detect and manage inappropriate, harmful, or sensitive content, + ensuring compliance with community guidelines and legal standards. """ return ImageContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessTextGenerationMetricDefault: + def referenceless_text_generation_metric_default( + self, asset_id: Union[str, asset.Asset], *args, **kwargs + ) -> ReferencelessTextGenerationMetricDefault: """ - The Referenceless Text Generation Metric Default is a function designed to -evaluate the quality of generated text without relying on reference texts for -comparison. + The Referenceless Text Generation Metric Default is a function designed to + evaluate the quality of generated text without relying on reference texts for + comparison. """ return ReferencelessTextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) def named_entity_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NamedEntityRecognition: """ - Named Entity Recognition (NER) is a natural language processing task that -involves identifying and classifying proper nouns in text into predefined -categories such as names of people, organizations, locations, dates, and other -entities. + Named Entity Recognition (NER) is a natural language processing task that + involves identifying and classifying proper nouns in text into predefined + categories such as names of people, organizations, locations, dates, and other + entities. """ return NamedEntityRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextContentModeration: """ - Text Content Moderation is the process of reviewing, filtering, and managing -user-generated content to ensure it adheres to community guidelines, legal -standards, and platform policies, thereby maintaining a safe and respectful -online environment. + Text Content Moderation is the process of reviewing, filtering, and managing + user-generated content to ensure it adheres to community guidelines, legal + standards, and platform policies, thereby maintaining a safe and respectful + online environment. """ return TextContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) def speaker_diarization_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationVideo: """ - The Speaker Diarization Video function identifies and segments different -speakers in a video, attributing portions of the audio to individual speakers -to facilitate analysis and understanding of multi-speaker conversations. + The Speaker Diarization Video function identifies and segments different + speakers in a video, attributing portions of the audio to individual speakers + to facilitate analysis and understanding of multi-speaker conversations. """ return SpeakerDiarizationVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) def split_on_silence(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnSilence: """ - The "Split On Silence" function divides an audio recording into separate -segments based on periods of silence, allowing for easier editing and analysis -of individual sections. + The "Split On Silence" function divides an audio recording into separate + segments based on periods of silence, allowing for easier editing and analysis + of individual sections. """ return SplitOnSilence(*args, asset_id=asset_id, pipeline=self, **kwargs) def emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EmotionDetection: """ - Emotion Detection is a process that involves analyzing text to identify and -categorize the emotional states or sentiments expressed by individuals, such as -happiness, sadness, anger, or fear. + Emotion Detection is a process that involves analyzing text to identify and + categorize the emotional states or sentiments expressed by individuals, such as + happiness, sadness, anger, or fear. """ return EmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_spam_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSpamDetection: """ - Text Spam Detection is a process that involves analyzing and identifying -unsolicited or irrelevant messages within text communications, typically using -algorithms and machine learning techniques to filter out spam and ensure the -integrity of the communication platform. + Text Spam Detection is a process that involves analyzing and identifying + unsolicited or irrelevant messages within text communications, typically using + algorithms and machine learning techniques to filter out spam and ensure the + integrity of the communication platform. """ return TextSpamDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Translation: """ - Translation is the process of converting text from one language into an -equivalent text in another language, preserving the original meaning and -context. + Translation is the process of converting text from one language into an + equivalent text in another language, preserving the original meaning and + context. """ return Translation(*args, asset_id=asset_id, pipeline=self, **kwargs) def voice_activity_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceActivityDetection: """ - Voice Activity Detection (VAD) is a technology that identifies the presence or -absence of human speech within an audio signal, enabling systems to distinguish -between spoken words and background noise. + Voice Activity Detection (VAD) is a technology that identifies the presence or + absence of human speech within an audio signal, enabling systems to distinguish + between spoken words and background noise. """ return VoiceActivityDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def speech_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechEmbedding: """ - Speech Embedding is a process that transforms spoken language into a fixed- -dimensional vector representation, capturing essential features and -characteristics of the speech for tasks such as recognition, classification, -and analysis. + Speech Embedding is a process that transforms spoken language into a fixed- + dimensional vector representation, capturing essential features and + characteristics of the speech for tasks such as recognition, classification, + and analysis. """ return SpeechEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) def subtitling_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SubtitlingTranslation: """ - Subtitling Translation is the process of converting spoken dialogue from one -language into written text in another language, which is then displayed on- -screen to aid viewers in understanding the content. + Subtitling Translation is the process of converting spoken dialogue from one + language into written text in another language, which is then displayed on- + screen to aid viewers in understanding the content. """ return SubtitlingTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGeneration: """ - Text Generation is a process in which artificial intelligence models, such as -neural networks, produce coherent and contextually relevant text based on a -given input or prompt, often mimicking human writing styles and patterns. + Text Generation is a process in which artificial intelligence models, such as + neural networks, produce coherent and contextually relevant text based on a + given input or prompt, often mimicking human writing styles and patterns. """ return TextGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) def video_understanding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoUnderstanding: """ - Video Understanding is the process of analyzing and interpreting video content -to extract meaningful information, such as identifying objects, actions, -events, and contextual relationships within the footage. + Video Understanding is the process of analyzing and interpreting video content + to extract meaningful information, such as identifying objects, actions, + events, and contextual relationships within the footage. """ return VideoUnderstanding(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToVideoGeneration: """ - Text To Video Generation is a process that converts written descriptions or -scripts into dynamic, visual video content using advanced algorithms and -artificial intelligence. + Text To Video Generation is a process that converts written descriptions or + scripts into dynamic, visual video content using advanced algorithms and + artificial intelligence. """ return TextToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextNormalization: """ - Text normalization is the process of transforming text into a standard, -consistent format by correcting spelling errors, converting all characters to a -uniform case, removing punctuation, and expanding abbreviations to improve the -text's readability and usability for further processing or analysis. + Text normalization is the process of transforming text into a standard, + consistent format by correcting spelling errors, converting all characters to a + uniform case, removing punctuation, and expanding abbreviations to improve the + text's readability and usability for further processing or analysis. """ return TextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) def speech_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechRecognition: """ - Speech recognition is a technology that enables a computer or device to -identify and process spoken language, converting it into text. + Speech recognition is a technology that enables a computer or device to + identify and process spoken language, converting it into text. """ return SpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) def subtitling(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Subtitling: """ - Subtitling is the process of displaying written text on a screen to represent -the spoken dialogue, narration, or other audio elements in a video, typically -to aid viewers who are deaf or hard of hearing, or to provide translations for -audiences who speak different languages. + Subtitling is the process of displaying written text on a screen to represent + the spoken dialogue, narration, or other audio elements in a video, typically + to aid viewers who are deaf or hard of hearing, or to provide translations for + audiences who speak different languages. """ return Subtitling(*args, asset_id=asset_id, pipeline=self, **kwargs) def classification_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ClassificationMetric: """ - A Classification Metric is a quantitative measure used to evaluate the quality -and effectiveness of classification models. + A Classification Metric is a quantitative measure used to evaluate the quality + and effectiveness of classification models. """ return ClassificationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_to_image_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToImageGeneration: """ - Text To Image Generation is a process where a system creates visual images -based on descriptive text input, translating written language into -corresponding graphical representations. + Text To Image Generation is a process where a system creates visual images + based on descriptive text input, translating written language into + corresponding graphical representations. """ return TextToImageGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - diff --git a/aixplain/modules/wallet.py b/aixplain/modules/wallet.py index 2b2b1cd4..e05e2d3a 100644 --- a/aixplain/modules/wallet.py +++ b/aixplain/modules/wallet.py @@ -32,4 +32,4 @@ def __init__(self, total_balance: float, reserved_balance: float): """ self.total_balance = total_balance self.reserved_balance = reserved_balance - self.available_balance = total_balance-reserved_balance \ No newline at end of file + self.available_balance = total_balance - reserved_balance diff --git a/aixplain/utils/__init__.py b/aixplain/utils/__init__.py index 8e82233f..552d61e3 100644 --- a/aixplain/utils/__init__.py +++ b/aixplain/utils/__init__.py @@ -18,4 +18,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -""" \ No newline at end of file +""" diff --git a/aixplain/utils/config.py b/aixplain/utils/config.py index 59805c60..03bbdccf 100644 --- a/aixplain/utils/config.py +++ b/aixplain/utils/config.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) BACKEND_URL = os.getenv("BACKEND_URL", "https://platform-api.aixplain.com") -MODELS_RUN_URL = os.getenv("MODELS_RUN_URL", "https://models.aixplain.com") +MODELS_RUN_URL = os.getenv("MODELS_RUN_URL", "https://models.aixplain.com/api/v1/execute") # GET THE API KEY FROM CMD TEAM_API_KEY = os.getenv("TEAM_API_KEY", "") AIXPLAIN_API_KEY = os.getenv("AIXPLAIN_API_KEY", "") diff --git a/aixplain/utils/convert_datatype_utils.py b/aixplain/utils/convert_datatype_utils.py index 4d9f321f..00dff186 100644 --- a/aixplain/utils/convert_datatype_utils.py +++ b/aixplain/utils/convert_datatype_utils.py @@ -14,12 +14,12 @@ limitations under the License. """ -from typing import Union, Dict, List +from typing import Union, Dict, List from aixplain.modules.metadata import MetaData def dict_to_metadata(metadatas: List[Union[Dict, MetaData]]) -> None: - + """Convert all the Dicts to MetaData Args: @@ -32,5 +32,3 @@ def dict_to_metadata(metadatas: List[Union[Dict, MetaData]]) -> None: metadatas[i] = MetaData(**metadatas[i]) except TypeError: raise TypeError(f"Data Asset Onboarding Error: One or more elements in the metadata_schema are not well-structured") - - \ No newline at end of file diff --git a/docs/streaming/aixplain_diarization_streaming_client.py b/docs/streaming/aixplain_diarization_streaming_client.py index 799c63a7..6689c855 100644 --- a/docs/streaming/aixplain_diarization_streaming_client.py +++ b/docs/streaming/aixplain_diarization_streaming_client.py @@ -25,6 +25,7 @@ FRAME_RATE = 16000 + def generate_payloads(file_path, latency): stream_configuration = pb.DiarizationRequest( config=pb.AudioConfig(encoding="LINEAR16", hertz=FRAME_RATE, language_code="en"), @@ -33,60 +34,68 @@ def generate_payloads(file_path, latency): ) yield stream_configuration # Iterate over the raw bytes in chunks - chunk_size = 16000 # half a second of audio + chunk_size = 16000 # half a second of audio i = 0 with open(file_path, "rb") as audio_file: while True: chunk = audio_file.read(chunk_size) if not chunk: break - logging.info(f'Sending chunk {i}') + logging.info(f"Sending chunk {i}") payload = pb.DiarizationRequest(audio_content=chunk) yield payload i += 1 - time.sleep(0.5) # simulate streaming by introducing sleep + time.sleep(0.5) # simulate streaming by introducing sleep + def grpc_duration_to_seconds(duration): seconds = float(duration.seconds) nanos = float(duration.nanos) / 1e9 return seconds + nanos + def consume_results(response: List[pb.DiarizationResponse]): for inference in response: if inference.is_final: - logging.info(f'Received is_final={inference.is_final}. total_time={inference.end_time.seconds}.{str(inference.end_time.nanos)[:3]}') + logging.info( + f"Received is_final={inference.is_final}. total_time={inference.end_time.seconds}.{str(inference.end_time.nanos)[:3]}" + ) if len(inference.segments): - logging.info(f'Turns:') + logging.info(f"Turns:") for segment in inference.segments: - logging.info(f"{segment.speaker_tag} \ - start:{grpc_duration_to_seconds(segment.start_time)}\tend:{grpc_duration_to_seconds(segment.end_time)}") + logging.info( + f"{segment.speaker_tag} \ + start:{grpc_duration_to_seconds(segment.start_time)}\tend:{grpc_duration_to_seconds(segment.end_time)}" + ) + def _stream_file(channel, file_path, latency): stub = aixplain_diarization_streaming_pb2_grpc.AixplainDiarizationStreamingStub(channel) response = stub.Diarize(generate_payloads(file_path, latency)) consume_results(response) + def run_insecure(host, file_path, latency): - with grpc.insecure_channel(host, options=(('grpc.ssl_target_name_override', host),)) as channel: + with grpc.insecure_channel(host, options=(("grpc.ssl_target_name_override", host),)) as channel: _stream_file(channel, file_path, latency) + def run(host, client_cert, client_key, ca_cert, file_path, latency): def create_secure_channel(host, client_cert, client_key, ca_cert): - with open(client_cert, 'rb') as f: + with open(client_cert, "rb") as f: client_cert_data = f.read() - with open(client_key, 'rb') as f: + with open(client_key, "rb") as f: client_key_data = f.read() - with open(ca_cert, 'rb') as f: + with open(ca_cert, "rb") as f: ca_cert_data = f.read() credentials = grpc.ssl_channel_credentials( - root_certificates=ca_cert_data, - private_key=client_key_data, - certificate_chain=client_cert_data + root_certificates=ca_cert_data, private_key=client_key_data, certificate_chain=client_cert_data ) return grpc.secure_channel(host, credentials) + with create_secure_channel(host, client_cert, client_key, ca_cert) as channel: _stream_file(channel, file_path, latency) @@ -96,13 +105,13 @@ def create_secure_channel(host, client_cert, client_key, ca_cert): logging.basicConfig(level=logging.INFO, format=log_format) parser = argparse.ArgumentParser(description="aiXplain speech recognition streaming client.") - parser.add_argument('--addr', default='localhost:50051', help='the address to connect to (default "localhost:50051")') - parser.add_argument('--cacert', default='./client-crt/ca.crt', help='ca cert for mTLS (default "./client-crt/ca.crt")') - parser.add_argument('--cert', default='./client-crt/tls.crt', help='client cert for mTLS (default "./client-crt/tls.crt")') - parser.add_argument('--key', default='./client-crt/tls.key', help='client key for mTLS (default "./client-crt/tls.key")') - parser.add_argument('--insecure', action='store_true', help='use insecure connection (no mTLS)') - parser.add_argument('--file-path', help='audio file to stream from') - parser.add_argument('--latency', type=float, help='Model latency') + parser.add_argument("--addr", default="localhost:50051", help='the address to connect to (default "localhost:50051")') + parser.add_argument("--cacert", default="./client-crt/ca.crt", help='ca cert for mTLS (default "./client-crt/ca.crt")') + parser.add_argument("--cert", default="./client-crt/tls.crt", help='client cert for mTLS (default "./client-crt/tls.crt")') + parser.add_argument("--key", default="./client-crt/tls.key", help='client key for mTLS (default "./client-crt/tls.key")') + parser.add_argument("--insecure", action="store_true", help="use insecure connection (no mTLS)") + parser.add_argument("--file-path", help="audio file to stream from") + parser.add_argument("--latency", type=float, help="Model latency") args = parser.parse_args() diff --git a/docs/streaming/aixplain_speech_transcription_streaming_client.py b/docs/streaming/aixplain_speech_transcription_streaming_client.py index 97f4e5dc..87fb3a8a 100644 --- a/docs/streaming/aixplain_speech_transcription_streaming_client.py +++ b/docs/streaming/aixplain_speech_transcription_streaming_client.py @@ -25,19 +25,21 @@ import aixplain_speech_transcription_streaming_pb2_grpc FRAME_RATE = 16000 -FFMPEG_FORMAT = 'wav' +FFMPEG_FORMAT = "wav" -PS_SUBTITLE = 'subtitle' -PS_LOG = 'log' +PS_SUBTITLE = "subtitle" +PS_LOG = "log" RED = "\033[0;31m" GREEN = "\033[0;32m" DEFAULT = "\033[0;97m" + def grpc_duration_to_seconds(duration): seconds = float(duration.seconds) nanos = float(duration.nanos) / 1e9 return seconds + nanos + def generate_payloads(file_path): # uncomment this if your audio file is not compatible stream_configuration = pb.SpeechRecognitionRequest( @@ -45,78 +47,87 @@ def generate_payloads(file_path): ) yield stream_configuration # Iterate over the raw bytes in chunks - chunk_size = 16000 # half a second of audio + chunk_size = 16000 # half a second of audio chunk_period = 0.5 - logging.info(f'Sending chunks...') + logging.info(f"Sending chunks...") with open(file_path, "rb") as audio_file: i = 0 while True: chunk = audio_file.read(chunk_size) if not chunk: break - time.sleep((len(chunk)/chunk_size)*chunk_period) # simulate streaming by introducing sleep - logging.debug(f'Sending chunk {i}') + time.sleep((len(chunk) / chunk_size) * chunk_period) # simulate streaming by introducing sleep + logging.debug(f"Sending chunk {i}") payload = pb.SpeechRecognitionRequest(audio_content=chunk) yield payload i += 1 + def consume_results(response: List[pb.SpeechRecognitionResponse], print_style): for i, inference in enumerate(response): if i == 0: - logging.info(f'Detected language {inference.language_code=}') + logging.info(f"Detected language {inference.language_code=}") if len(inference.hypotheses): # get the top hypothesis hypothesis = inference.hypotheses[0] transcript, confidence = hypothesis.transcript, hypothesis.confidence if inference.is_final: - logging.info(f't={grpc_duration_to_seconds(inference.end_time):.3f} conf={confidence:.2f} FINAL="{transcript}"') + logging.info( + f't={grpc_duration_to_seconds(inference.end_time):.3f} conf={confidence:.2f} FINAL="{transcript}"' + ) if print_style == PS_SUBTITLE: sys.stdout.write(GREEN) sys.stdout.write("\033[K") - sys.stdout.write(f'{grpc_duration_to_seconds(inference.end_time):.3f}' + ": " + transcript + "\n") + sys.stdout.write(f"{grpc_duration_to_seconds(inference.end_time):.3f}" + ": " + transcript + "\n") else: - logging.info(f't={grpc_duration_to_seconds(inference.end_time):.3f} conf={confidence:.2f} chunk="{transcript}"') + logging.info( + f't={grpc_duration_to_seconds(inference.end_time):.3f} conf={confidence:.2f} chunk="{transcript}"' + ) if print_style == PS_SUBTITLE: sys.stdout.write(RED) sys.stdout.write("\033[K") - sys.stdout.write(f'{grpc_duration_to_seconds(inference.end_time):.3f}' + ": " + transcript + "\r") + sys.stdout.write(f"{grpc_duration_to_seconds(inference.end_time):.3f}" + ": " + transcript + "\r") for word in hypothesis.words: - logging.info(f'Word: {word.word.ljust(12)} ' - f'Start-End: {grpc_duration_to_seconds(word.start_time):.3f}-{grpc_duration_to_seconds(word.end_time):.3f} ' - f'Confidence: {word.confidence:.3f} ') + logging.info( + f"Word: {word.word.ljust(12)} " + f"Start-End: {grpc_duration_to_seconds(word.start_time):.3f}-{grpc_duration_to_seconds(word.end_time):.3f} " + f"Confidence: {word.confidence:.3f} " + ) else: # called at the end - logging.info(f'{inference.is_final=} server processing_time={grpc_duration_to_seconds(inference.end_time):.3f}') + logging.info(f"{inference.is_final=} server processing_time={grpc_duration_to_seconds(inference.end_time):.3f}") if print_style == PS_SUBTITLE: sys.stdout.write(DEFAULT) sys.stdout.write("Exiting...\n") + def _stream_file(channel, file_path, print_style): stub = aixplain_speech_transcription_streaming_pb2_grpc.AixplainSpeechStreamingStub(channel) response = stub.SpeechRecognize(generate_payloads(file_path)) consume_results(response, print_style) + def run_insecure(host, file_path, print_style): - with grpc.insecure_channel(host, options=(('grpc.ssl_target_name_override', host),)) as channel: + with grpc.insecure_channel(host, options=(("grpc.ssl_target_name_override", host),)) as channel: _stream_file(channel, file_path, print_style) + def run(host, client_cert, client_key, ca_cert, file_path, print_style): def create_secure_channel(host, client_cert, client_key, ca_cert): - with open(client_cert, 'rb') as f: + with open(client_cert, "rb") as f: client_cert_data = f.read() - with open(client_key, 'rb') as f: + with open(client_key, "rb") as f: client_key_data = f.read() - with open(ca_cert, 'rb') as f: + with open(ca_cert, "rb") as f: ca_cert_data = f.read() credentials = grpc.ssl_channel_credentials( - root_certificates=ca_cert_data, - private_key=client_key_data, - certificate_chain=client_cert_data + root_certificates=ca_cert_data, private_key=client_key_data, certificate_chain=client_cert_data ) return grpc.secure_channel(host, credentials) + with create_secure_channel(host, client_cert, client_key, ca_cert) as channel: _stream_file(channel, file_path, print_style) @@ -126,20 +137,22 @@ def create_secure_channel(host, client_cert, client_key, ca_cert): logging.basicConfig(level=logging.INFO, format=log_format) parser = argparse.ArgumentParser(description="aiXplain speech recognition streaming client.") - parser.add_argument('--print-style', default='log', choices=[PS_SUBTITLE, PS_LOG], help='The print style, either "log" or "subtitle"') - parser.add_argument('--addr', default='localhost:50051', help='the address to connect to (default "localhost:50051")') - parser.add_argument('--cacert', default='./client-crt/ca.crt', help='ca cert for mTLS (default "./client-crt/ca.crt")') - parser.add_argument('--cert', default='./client-crt/tls.crt', help='client cert for mTLS (default "./client-crt/tls.crt")') - parser.add_argument('--key', default='./client-crt/tls.key', help='client key for mTLS (default "./client-crt/tls.key")') - parser.add_argument('--insecure', action='store_true', help='use insecure connection (no mTLS)') - parser.add_argument('--file-path', default="resources/conv.wav", help='audio file to stream from') + parser.add_argument( + "--print-style", default="log", choices=[PS_SUBTITLE, PS_LOG], help='The print style, either "log" or "subtitle"' + ) + parser.add_argument("--addr", default="localhost:50051", help='the address to connect to (default "localhost:50051")') + parser.add_argument("--cacert", default="./client-crt/ca.crt", help='ca cert for mTLS (default "./client-crt/ca.crt")') + parser.add_argument("--cert", default="./client-crt/tls.crt", help='client cert for mTLS (default "./client-crt/tls.crt")') + parser.add_argument("--key", default="./client-crt/tls.key", help='client key for mTLS (default "./client-crt/tls.key")') + parser.add_argument("--insecure", action="store_true", help="use insecure connection (no mTLS)") + parser.add_argument("--file-path", default="resources/conv.wav", help="audio file to stream from") args = parser.parse_args() if args.print_style == PS_SUBTITLE: - logging.getLogger('').setLevel(logging.ERROR) + logging.getLogger("").setLevel(logging.ERROR) if args.insecure: run_insecure(args.addr, args.file_path, args.print_style) else: - run(args.addr, args.cert, args.key, args.cacert, args.file_path, args.print_style) + run(args.addr, args.cert, args.key, args.cacert, args.file_path, args.print_style) diff --git a/docs/streaming/make_audio_compatible.py b/docs/streaming/make_audio_compatible.py index 9d04c2d5..d9a43277 100644 --- a/docs/streaming/make_audio_compatible.py +++ b/docs/streaming/make_audio_compatible.py @@ -4,6 +4,7 @@ FRAME_RATE = 16000 + def create_compatible_audio(source_path, dest_path): """ Function to resample an audio file and change the number of channels if there are more than 1. @@ -13,22 +14,23 @@ def create_compatible_audio(source_path, dest_path): updated = False if sound_file.frame_rate != FRAME_RATE: # Resample the audio file - logging.info(f'Resampling {sound_file.frame_rate} -> {FRAME_RATE}') + logging.info(f"Resampling {sound_file.frame_rate} -> {FRAME_RATE}") sound_file = sound_file.set_frame_rate(FRAME_RATE) updated = True # If the audio file has more than one channel, convert it to mono if sound_file.channels > 1: - logging.info(f'Changing no. channels {sound_file.channels} -> 1') + logging.info(f"Changing no. channels {sound_file.channels} -> 1") sound_file = sound_file.set_channels(1) updated = True if updated: # Export the processed audio file sound_file.export(dest_path, format="wav") + if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Process some audio files.') - parser.add_argument('--source_path', required=True, help='Source path for the audio file') - parser.add_argument('--dest_path', required=True, help='Destination path for the processed audio file') + parser = argparse.ArgumentParser(description="Process some audio files.") + parser.add_argument("--source_path", required=True, help="Source path for the audio file") + parser.add_argument("--dest_path", required=True, help="Destination path for the processed audio file") args = parser.parse_args() diff --git a/tests/__init__.py b/tests/__init__.py index 8e82233f..552d61e3 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -18,4 +18,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -""" \ No newline at end of file +""" diff --git a/tests/functional/benchmark/benchmark_functional_test.py b/tests/functional/benchmark/benchmark_functional_test.py index ef8b77b8..907f543a 100644 --- a/tests/functional/benchmark/benchmark_functional_test.py +++ b/tests/functional/benchmark/benchmark_functional_test.py @@ -35,6 +35,7 @@ def run_input_map(request): def module_input_map(request): return request.param + def is_job_finshed(benchmark_job): time_taken = 0 sleep_time = 15 @@ -52,15 +53,15 @@ def is_job_finshed(benchmark_job): break return False + def assert_correct_results(benchmark_job): df = benchmark_job.download_results_as_csv(return_dataframe=True) assert type(df) is pd.DataFrame, "Couldn't download CSV" - model_success_rate = (sum(df["Model_success"])*100)/len(df.index) - assert model_success_rate > 80 , f"Low model success rate ({model_success_rate})" + model_success_rate = (sum(df["Model_success"]) * 100) / len(df.index) + assert model_success_rate > 80, f"Low model success rate ({model_success_rate})" metric_name = "BLEU by sacrebleu" mean_score = df[metric_name].mean() - assert mean_score != 0 , f"Zero Mean Score - Please check metric ({metric_name})" - + assert mean_score != 0, f"Zero Mean Score - Please check metric ({metric_name})" def test_create_and_run(run_input_map): @@ -75,9 +76,6 @@ def test_create_and_run(run_input_map): assert_correct_results(benchmark_job) - - - # def test_module(module_input_map): # benchmark = BenchmarkFactory.get(module_input_map["benchmark_id"]) # assert benchmark.id == module_input_map["benchmark_id"] diff --git a/tests/functional/file_asset/file_create_test.py b/tests/functional/file_asset/file_create_test.py index adbf24b6..6d2a5739 100644 --- a/tests/functional/file_asset/file_create_test.py +++ b/tests/functional/file_asset/file_create_test.py @@ -20,22 +20,14 @@ from aixplain.enums import License from aixplain.factories import FileFactory + def test_file_create(): upload_file = "tests/functional/file_asset/input/test.csv" - s3_link = FileFactory.create( - local_path = upload_file, - tags = ['test1','test2'], - license = License.MIT, - is_temp = False - ) + s3_link = FileFactory.create(local_path=upload_file, tags=["test1", "test2"], license=License.MIT, is_temp=False) assert s3_link.startswith("s3") + def test_file_create_temp(): upload_file = "tests/functional/file_asset/input/test.csv" - s3_link = FileFactory.create( - local_path = upload_file, - tags = ['test1','test2'], - license = License.MIT, - is_temp = True - ) + s3_link = FileFactory.create(local_path=upload_file, tags=["test1", "test2"], license=License.MIT, is_temp=True) assert s3_link.startswith("s3") diff --git a/tests/functional/model/image_upload_e2e_test.py b/tests/functional/model/image_upload_e2e_test.py index 7c7efbcc..90ebddfd 100644 --- a/tests/functional/model/image_upload_e2e_test.py +++ b/tests/functional/model/image_upload_e2e_test.py @@ -38,7 +38,9 @@ def test_create_and_upload_model(): input_modality = mock_register_payload["input_modality"] output_modality = mock_register_payload["output_modality"] documentation_url = mock_register_payload["documentation_url"] - register_response = ModelFactory.create_asset_repo(name, description, function, source_language, input_modality, output_modality, documentation_url, config.TEAM_API_KEY) + register_response = ModelFactory.create_asset_repo( + name, description, function, source_language, input_modality, output_modality, documentation_url, config.TEAM_API_KEY + ) assert "id" in register_response.keys() assert "repositoryName" in register_response.keys() model_id = register_response["id"] diff --git a/tests/functional/model/image_upload_functional_test.py b/tests/functional/model/image_upload_functional_test.py index 60d1d3f0..cae7b731 100644 --- a/tests/functional/model/image_upload_functional_test.py +++ b/tests/functional/model/image_upload_functional_test.py @@ -7,6 +7,7 @@ import docker import pytest + def test_login(): response = ModelFactory.asset_repo_login() assert response["username"] == "AWS" @@ -27,7 +28,9 @@ def test_create_asset_repo(): input_modality = mock_register_payload["input_modality"] output_modality = mock_register_payload["output_modality"] documentation_url = mock_register_payload["documentation_url"] - response = ModelFactory.create_asset_repo(name, description, function, source_language, input_modality, output_modality, documentation_url, config.TEAM_API_KEY) + response = ModelFactory.create_asset_repo( + name, description, function, source_language, input_modality, output_modality, documentation_url, config.TEAM_API_KEY + ) response_dict = dict(response) assert "id" in response_dict.keys() assert "repositoryName" in response_dict.keys() diff --git a/tests/test_utils.py b/tests/test_utils.py index e7a41e16..264538d5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,14 +3,16 @@ import logging from aixplain.utils import config + def delete_asset(model_id, api_key): delete_url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") logging.debug(f"URL: {delete_url}") headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} _ = _request_with_retry("delete", delete_url, headers=headers) + def delete_service_account(api_key): delete_url = urljoin(config.BACKEND_URL, f"sdk/ecr/logout") logging.debug(f"URL: {delete_url}") headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} - _ = _request_with_retry("post", delete_url, headers=headers) \ No newline at end of file + _ = _request_with_retry("post", delete_url, headers=headers) diff --git a/tests/unit/finetune_test.py b/tests/unit/finetune_test.py index 13287c32..3691bd40 100644 --- a/tests/unit/finetune_test.py +++ b/tests/unit/finetune_test.py @@ -68,6 +68,7 @@ def test_create(): assert finetune.model.id == test_model assert finetune.cost.to_dict() == cost_estimation_map + def test_create_exception(): model_map = read_data(MODEL_FILE) with requests_mock.Mocker() as mock: @@ -109,14 +110,15 @@ def test_start(): assert fine_tuned_model is not None assert fine_tuned_model.id == model_map["id"] + @pytest.mark.parametrize( - "input_path,after_epoch,training_loss,validation_loss", + "input_path,after_epoch,training_loss,validation_loss", [ - (FINETUNE_STATUS_FILE, None, 0.4, 0.0217), + (FINETUNE_STATUS_FILE, None, 0.4, 0.0217), (FINETUNE_STATUS_FILE, 1, 0.2, 0.0482), - (FINETUNE_STATUS_FILE_2, None, 2.657801408034, 2.596168756485), - (FINETUNE_STATUS_FILE_2, 0, None, 2.684150457382) - ] + (FINETUNE_STATUS_FILE_2, None, 2.657801408034, 2.596168756485), + (FINETUNE_STATUS_FILE_2, 0, None, 2.684150457382), + ], ) def test_check_finetuner_status(input_path, after_epoch, training_loss, validation_loss): model_map = read_data(input_path) @@ -146,4 +148,4 @@ def test_list_finetunable_models(is_finetunable): model_list = result["results"] assert len(model_list) > 0 for model_index in range(len(model_list)): - assert model_list[model_index].id == list_map["items"][model_index]["id"] \ No newline at end of file + assert model_list[model_index].id == list_map["items"][model_index]["id"] diff --git a/tests/unit/image_upload_test.py b/tests/unit/image_upload_test.py index 4b192292..7cd43946 100644 --- a/tests/unit/image_upload_test.py +++ b/tests/unit/image_upload_test.py @@ -56,6 +56,7 @@ def test_list_host_machines(): for key in machine_dict.keys(): assert machine_dict[key] == mock_json_dict[key] + def test_get_functions(): url = urljoin(config.BACKEND_URL, f"sdk/functions") with requests_mock.Mocker() as mock: @@ -65,6 +66,7 @@ def test_get_functions(): functions = ModelFactory.list_functions(config.TEAM_API_KEY) assert functions == mock_json + @pytest.mark.skip(reason="Not currently supported.") def test_list_image_repo_tags(): model_id = "mock_id" diff --git a/tests/unit/llm_test.py b/tests/unit/llm_test.py index 54887950..1329e136 100644 --- a/tests/unit/llm_test.py +++ b/tests/unit/llm_test.py @@ -4,6 +4,8 @@ load_dotenv() from aixplain.utils import config +from aixplain.enums import ModelStatus +from aixplain.modules.model.response import ModelResponse from aixplain.modules import LLM import pytest @@ -49,3 +51,43 @@ def test_run_async_errors(status_code, error_message): response = test_llm.run_async(data="input_data") assert response["status"] == "FAILED" assert response["error_message"] == error_message + + +def test_run_sync(): + model_id = "test-model-id" + base_url = config.MODELS_RUN_URL + execute_url = f"{base_url}/{model_id}".replace("/api/v1/execute", "/api/v2/execute") + + ref_response = { + "status": "IN_PROGRESS", + "data": "https://models.aixplain.com/api/v1/data/a90c2078-edfe-403f-acba-d2d94cf71f42", + } + + poll_response = { + "completed": True, + "status": "SUCCESS", + "data": "Test Model Result", + "usedCredits": 0, + "runTime": 0, + } + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=ref_response) + + poll_url = ref_response["data"] + mock.get(poll_url, json=poll_response) + + test_model = LLM( + id=model_id, name="Test Model", function=Function.TEXT_GENERATION, url=base_url, api_key=config.TEAM_API_KEY + ) + + input_data = {"data": "input_data"} + response = test_model.run(data=input_data, temperature=0.001, max_tokens=128, top_p=1.0) + + assert isinstance(response, ModelResponse) + assert response.status == ModelStatus.SUCCESS + assert response.data == "Test Model Result" + assert response.completed is True + assert response.used_credits == 0 + assert response.run_time == 0 + assert response.usage is None diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index 03dccdbe..94e2f6c2 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -20,7 +20,6 @@ import requests_mock load_dotenv() -import re import json from aixplain.utils import config from aixplain.modules import Model @@ -28,8 +27,10 @@ from aixplain.factories import ModelFactory from aixplain.enums import Function from urllib.parse import urljoin - +from aixplain.enums import ModelStatus +from aixplain.modules.model.response import ModelResponse import pytest +from unittest.mock import patch def test_build_payload(): @@ -66,7 +67,7 @@ def test_call_run_endpoint_sync(): model_id = "model-id" execute_url = f"{base_url}/{model_id}".replace("/api/v1/execute", "/api/v2/execute") payload = {"data": "input_data"} - ref_response = {"completed": True, "status": "SUCCESS", "data": "Hello"} + ref_response = {"completed": True, "status": ModelStatus.SUCCESS, "data": "Hello"} with requests_mock.Mocker() as mock: mock.post(execute_url, json=ref_response) @@ -85,28 +86,26 @@ def test_success_poll(): mock.get(poll_url, headers=headers, json=ref_response) test_model = Model("", "") hyp_response = test_model.poll(poll_url=poll_url) + assert isinstance(hyp_response, ModelResponse) assert hyp_response["completed"] == ref_response["completed"] - assert hyp_response["status"] == "SUCCESS" + assert hyp_response["status"] == ModelStatus.SUCCESS def test_failed_poll(): with requests_mock.Mocker() as mock: poll_url = "https://models.aixplain.com/api/v1/data/a90c2078-edfe-403f-acba-d2d94cf71f42" headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} - ref_response = { - "completed": True, - "error": "err.supplier_error", - "supplierError": re.escape( - '{"error":{"message":"The model `` does not exist","type":"invalid_request_error","param":null,"code":"model_not_found"}}' - ), - } + ref_response = {"completed": True, "status": "FAILED", "error_message": "Some error occurred"} + + with requests_mock.Mocker() as mock: mock.get(poll_url, headers=headers, json=ref_response) - test_model = Model("", "") - hyp_response = test_model.poll(poll_url=poll_url) - assert hyp_response["completed"] == ref_response["completed"] - assert hyp_response["error"] == ref_response["error"] - assert hyp_response["supplierError"] == ref_response["supplierError"] - assert hyp_response["status"] == "FAILED" + model = Model(id="test-id", name="Test Model") + response = model.poll(poll_url=poll_url) + + assert isinstance(response, ModelResponse) + assert response.status == ModelStatus.FAILED + assert response.error_message == "Some error occurred" + assert response.completed is True @pytest.mark.parametrize( @@ -139,15 +138,14 @@ def test_run_async_errors(status_code, error_message): base_url = config.MODELS_RUN_URL model_id = "model-id" execute_url = f"{base_url}/{model_id}" - ref_response = { - "error": "An unspecified error occurred while processing your request.", - } + ref_response = "An unspecified error occurred while processing your request." with requests_mock.Mocker() as mock: mock.post(execute_url, status_code=status_code, json=ref_response) test_model = Model(id=model_id, name="Test Model", url=base_url) response = test_model.run_async(data="input_data") - assert response["status"] == "FAILED" + assert isinstance(response, ModelResponse) + assert response["status"] == ModelStatus.FAILED assert response["error_message"] == error_message @@ -189,3 +187,72 @@ def test_get_assets_from_page_error(): ) assert "Listing Models Error: Failed to retrieve models" in str(excinfo.value) + + +def test_run_sync(): + model_id = "test-model-id" + base_url = config.MODELS_RUN_URL + execute_url = f"{base_url}/{model_id}".replace("/api/v1/execute", "/api/v2/execute") + + ref_response = { + "status": "IN_PROGRESS", + "data": "https://models.aixplain.com/api/v1/data/a90c2078-edfe-403f-acba-d2d94cf71f42", + } + + poll_response = { + "completed": True, + "status": "SUCCESS", + "data": "Test Model Result", + "usedCredits": 0, + "runTime": 0, + } + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=ref_response) + + poll_url = ref_response["data"] + mock.get(poll_url, json=poll_response) + + test_model = Model(id=model_id, name="Test Model", url=base_url, api_key=config.TEAM_API_KEY) + + input_data = {"data": "input_data"} + response = test_model.run(data=input_data, name="test_run") + + assert isinstance(response, ModelResponse) + assert response.status == ModelStatus.SUCCESS + assert response.data == "Test Model Result" + assert response.completed is True + assert response.used_credits == 0 + assert response.run_time == 0 + assert response.usage is None + + +def test_sync_poll(): + poll_url = "https://models.aixplain.com/api/v1/data/mock-model-id/poll" + + in_progress_response = ModelResponse( + status="IN_PROGRESS", data="", completed=False, error_message="", used_credits=0, run_time=0, usage=None + ) + + success_response = ModelResponse( + status="SUCCESS", + data="Polling successful result", + details={"test": "test"}, + completed=True, + error_message="", + used_credits=0, + run_time=0, + usage=None, + ) + + model = Model(id="mock-model-id", name="Mock Model") + + with patch.object(model, "poll", side_effect=[in_progress_response, in_progress_response, success_response]): + + response = model.sync_poll(poll_url=poll_url, name="test_poll", timeout=5) + + assert isinstance(response, ModelResponse) + assert response["status"] == "SUCCESS" + assert response["completed"] is True + assert response["details"] == {"test": "test"} + assert response["data"] == "Polling successful result" From d1538af4d45191aed0602b9c4d4bc1f7c77d2040 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 4 Nov 2024 10:10:55 -0300 Subject: [PATCH 063/105] Set Model Tool description (#292) --- aixplain/factories/agent_factory/__init__.py | 3 ++- aixplain/factories/agent_factory/utils.py | 1 + aixplain/modules/agent/tool/model_tool.py | 4 +++- tests/unit/agent_test.py | 6 +++++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 2a16e191..3e66884e 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -141,6 +141,7 @@ def create_model_tool( model: Optional[Union[Model, Text]] = None, function: Optional[Union[Function, Text]] = None, supplier: Optional[Union[Supplier, Text]] = None, + description: Text = "", ) -> ModelTool: """Create a new model tool.""" if function is not None and isinstance(function, str): @@ -154,7 +155,7 @@ def create_model_tool( break if isinstance(supplier, str): supplier = None - return ModelTool(function=function, supplier=supplier, model=model) + return ModelTool(function=function, supplier=supplier, model=model, description=description) @classmethod def create_pipeline_tool(cls, description: Text, pipeline: Union[Pipeline, Text]) -> PipelineTool: diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index d86982ef..9192f1d4 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -28,6 +28,7 @@ def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: supplier=tool["supplier"], version=tool["version"], model=tool["assetId"], + description=tool.get("description", ""), ) elif tool["type"] == "pipeline": tool = PipelineTool(description=tool["description"], pipeline=tool["assetId"]) diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index 3a84c45b..404ed8d7 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -42,6 +42,7 @@ def __init__( function: Optional[Union[Function, Text]] = None, supplier: Optional[Union[Dict, Supplier]] = None, model: Optional[Union[Text, Model]] = None, + description: Text = "", **additional_info, ) -> None: """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. @@ -50,11 +51,12 @@ def __init__( function (Optional[Union[Function, Text]]): task that the tool performs. Defaults to None. supplier (Optional[Union[Dict, Supplier]]): Preferred supplier to perform the task. Defaults to None. Defaults to None. model (Optional[Union[Text, Model]]): Model function. Defaults to None. + description (Text): Description of the tool. Defaults to "". """ assert ( function is not None or model is not None ), "Agent Creation Error: Either function or model must be provided when instantiating a tool." - super().__init__("", "", **additional_info) + super().__init__(name="", description=description, **additional_info) if function is not None: if isinstance(function, str): function = Function(function) diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 1be0682e..43d0d0a2 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -112,6 +112,7 @@ def test_create_agent(): "version": "1.0", "assetId": "6646261c6eb563165658bbb1", "function": "text-generation", + "description": "Test Tool", } ], } @@ -134,10 +135,13 @@ def test_create_agent(): name="Test Agent", description="Test Agent Description", llm_id="6646261c6eb563165658bbb1", - tools=[AgentFactory.create_model_tool(supplier=Supplier.OPENAI, function="text-generation")], + tools=[ + AgentFactory.create_model_tool(supplier=Supplier.OPENAI, function="text-generation", description="Test Tool") + ], ) assert agent.name == ref_response["name"] assert agent.description == ref_response["description"] assert agent.llm_id == ref_response["llmId"] assert agent.tools[0].function.value == ref_response["assets"][0]["function"] + assert agent.tools[0].description == ref_response["assets"][0]["description"] From d731ff0207c89dade1a371c13a99937930f9b3ba Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 4 Nov 2024 11:51:10 -0300 Subject: [PATCH 064/105] Eng 812 update agents (#285) * Update method for agents and teams * Agent Functional Test * Team Agent Function tests * Team Agent Unit Tests * Improvements in tests and listing agents * Refactoring listing teams method * Add contain field in model response --- aixplain/enums/asset_status.py | 1 + aixplain/factories/agent_factory/__init__.py | 147 ++++++++---------- aixplain/factories/agent_factory/utils.py | 52 +++---- .../factories/team_agent_factory/__init__.py | 147 ++++++++++-------- .../factories/team_agent_factory/utils.py | 26 ++-- aixplain/modules/agent/__init__.py | 68 +++++++- aixplain/modules/agent/tool.py | 59 ------- aixplain/modules/agent/tool/__init__.py | 7 + aixplain/modules/agent/tool/model_tool.py | 20 +++ aixplain/modules/agent/tool/pipeline_tool.py | 7 + aixplain/modules/model/response.py | 7 + aixplain/modules/team_agent/__init__.py | 67 ++++++++ .../functional/agent/agent_functional_test.py | 50 +++++- .../team_agent/team_agent_functional_test.py | 58 ++++++- tests/unit/agent_test.py | 85 +++++++++- tests/unit/team_agent_test.py | 125 ++++++++++++++- 16 files changed, 657 insertions(+), 269 deletions(-) delete mode 100644 aixplain/modules/agent/tool.py diff --git a/aixplain/enums/asset_status.py b/aixplain/enums/asset_status.py index 9274001f..994212fb 100644 --- a/aixplain/enums/asset_status.py +++ b/aixplain/enums/asset_status.py @@ -26,6 +26,7 @@ class AssetStatus(Text, Enum): + DRAFT = "draft" HIDDEN = "hidden" SCHEDULED = "scheduled" ONBOARDING = "onboarding" diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 3e66884e..c56d1fd8 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -34,7 +34,6 @@ from aixplain.utils import config from typing import Dict, List, Optional, Text, Union -from aixplain.factories.agent_factory.utils import build_agent, validate_llm, validate_name from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin @@ -65,74 +64,49 @@ def create( Returns: Agent: created Agent """ - validate_name(name) - # validate LLM ID - validate_llm(llm_id) + from aixplain.factories.agent_factory.utils import build_agent + agent = None + url = urljoin(config.BACKEND_URL, "sdk/agents") + headers = {"x-api-key": api_key} + + if isinstance(supplier, dict): + supplier = supplier["code"] + elif isinstance(supplier, Supplier): + supplier = supplier.value["code"] + + payload = { + "name": name, + "assets": [tool.to_dict() for tool in tools], + "description": description, + "supplier": supplier, + "version": version, + "llmId": llm_id, + "status": "draft", + } + agent = build_agent(payload=payload, api_key=api_key) + agent.validate() + response = "Unspecified error" try: - agent = None - url = urljoin(config.BACKEND_URL, "sdk/agents") - headers = {"x-api-key": api_key} - - if isinstance(supplier, dict): - supplier = supplier["code"] - elif isinstance(supplier, Supplier): - supplier = supplier.value["code"] - - tool_payload = [] - for tool in tools: - if isinstance(tool, ModelTool): - tool.validate() - tool_payload.append( - { - "function": tool.function.value if tool.function is not None else None, - "type": "model", - "description": tool.description, - "supplier": tool.supplier.value["code"] if tool.supplier else None, - "version": tool.version if tool.version else None, - "assetId": tool.model, - } - ) - elif isinstance(tool, PipelineTool): - tool.validate() - tool_payload.append( - { - "assetId": tool.pipeline, - "description": tool.description, - "type": "pipeline", - } - ) - else: - raise Exception("Agent Creation Error: Tool type not supported.") - - payload = { - "name": name, - "assets": tool_payload, - "description": description, - "supplier": supplier, - "version": version, - "llmId": llm_id, - } - - logging.info(f"Start service for POST Create Agent - {url} - {headers} - {json.dumps(payload)}") + logging.debug(f"Start service for POST Create Agent - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) - if 200 <= r.status_code < 300: - response = r.json() - agent = build_agent(payload=response, api_key=api_key) - else: - error = r.json() - error_msg = "Agent Onboarding Error: Please contact the administrators." - if "message" in error: - msg = error["message"] - if error["message"] == "err.name_already_exists": - msg = "Agent name already exists." - elif error["message"] == "err.asset_is_not_available": - msg = "Some tools are not available." - error_msg = f"Agent Onboarding Error (HTTP {r.status_code}): {msg}" - logging.exception(error_msg) - raise Exception(error_msg) - except Exception as e: - raise Exception(e) + response = r.json() + except Exception: + raise Exception("Agent Onboarding Error: Please contact the administrators.") + + if 200 <= r.status_code < 300: + agent = build_agent(payload=response, api_key=api_key) + else: + error_msg = f"Agent Onboarding Error: {response}" + if "message" in response: + msg = response["message"] + if response["message"] == "err.name_already_exists": + msg = "Agent name already exists." + elif response["message"] == "err.asset_is_not_available": + msg = "Some tools are not available." + error_msg = f"Agent Onboarding Error (HTTP {r.status_code}): {msg}" + logging.exception(error_msg) + raise Exception(error_msg) return agent @classmethod @@ -165,37 +139,42 @@ def create_pipeline_tool(cls, description: Text, pipeline: Union[Pipeline, Text] @classmethod def list(cls) -> Dict: """List all agents available in the platform.""" + from aixplain.factories.agent_factory.utils import build_agent + url = urljoin(config.BACKEND_URL, "sdk/agents") headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + resp = {} payload = {} logging.info(f"Start service for GET List Agents - {url} - {headers} - {json.dumps(payload)}") try: r = _request_with_retry("get", url, headers=headers) resp = r.json() + except Exception: + raise Exception("Agent Listing Error: Please contact the administrators.") - if 200 <= r.status_code < 300: - agents, page_total, total = [], 0, 0 - results = resp - page_total = len(results) - total = len(results) - logging.info(f"Response for GET List Agents - Page Total: {page_total} / Total: {total}") - for agent in results: - agents.append(build_agent(agent)) - return {"results": agents, "page_total": page_total, "page_number": 0, "total": total} - else: - error_msg = "Agent Listing Error: Please contact the administrators." - if "message" in resp: - msg = resp["message"] - error_msg = f"Agent Listing Error (HTTP {r.status_code}): {msg}" - logging.exception(error_msg) - raise Exception(error_msg) - except Exception as e: - raise Exception(e) + if 200 <= r.status_code < 300: + agents, page_total, total = [], 0, 0 + results = resp + page_total = len(results) + total = len(results) + logging.info(f"Response for GET List Agents - Page Total: {page_total} / Total: {total}") + for agent in results: + agents.append(build_agent(agent)) + return {"results": agents, "page_total": page_total, "page_number": 0, "total": total} + else: + error_msg = "Agent Listing Error: Please contact the administrators." + if isinstance(resp, dict) and "message" in resp: + msg = resp["message"] + error_msg = f"Agent Listing Error (HTTP {r.status_code}): {msg}" + logging.exception(error_msg) + raise Exception(error_msg) @classmethod def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> Agent: """Get agent by id.""" + from aixplain.factories.agent_factory.utils import build_agent + url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent_id}") if config.AIXPLAIN_API_KEY != "": headers = {"x-aixplain-key": f"{config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index 9192f1d4..e5e73dc4 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -12,20 +12,22 @@ def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: """Instantiate a new agent in the platform.""" - tools = payload["assets"] - for i, tool in enumerate(tools): + tools_dict = payload["assets"] + tools = [] + for tool in tools_dict: if tool["type"] == "model": - for supplier in Supplier: + supplier = "aixplain" + for supplier_ in Supplier: if tool["supplier"] is not None and tool["supplier"].lower() in [ - supplier.value["code"].lower(), - supplier.value["name"].lower(), + supplier_.value["code"].lower(), + supplier_.value["name"].lower(), ]: - tool["supplier"] = supplier + supplier = supplier_ break tool = ModelTool( - function=Function(tool["function"]) if tool["function"] is not None else None, - supplier=tool["supplier"], + function=Function(tool.get("function", None)), + supplier=supplier, version=tool["version"], model=tool["assetId"], description=tool.get("description", ""), @@ -34,37 +36,19 @@ def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: tool = PipelineTool(description=tool["description"], pipeline=tool["assetId"]) else: raise Exception("Agent Creation Error: Tool type not supported.") - tools[i] = tool + tools.append(tool) agent = Agent( - id=payload["id"], - name=payload["name"] if "name" in payload else "", + id=payload["id"] if "id" in payload else "", + name=payload.get("name", ""), tools=tools, - description=payload["description"] if "description" in payload else "", - supplier=payload["teamId"] if "teamId" in payload else None, - version=payload["version"] if "version" in payload else None, - cost=payload["cost"] if "cost" in payload else None, - llm_id=payload["llmId"] if "llmId" in payload else GPT_4o_ID, + description=payload.get("description", ""), + supplier=payload.get("teamId", None), + version=payload.get("version", None), + cost=payload.get("cost", None), + llm_id=payload.get("llmId", GPT_4o_ID), api_key=api_key, status=AssetStatus(payload["status"]), ) agent.url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}/run") return agent - - -def validate_llm(model_id: Text) -> None: - from aixplain.factories.model_factory import ModelFactory - - try: - llm = ModelFactory.get(model_id) - assert llm.function == Function.TEXT_GENERATION, "Large Language Model must be a text generation model." - except Exception: - raise Exception(f"Large Language Model with ID '{model_id}' not found.") - - -def validate_name(name: Text) -> None: - import re - - assert ( - re.match("^[a-zA-Z0-9 ]*$", name) is not None - ), "Agent Creation Error: Agent name must not contain special characters." diff --git a/aixplain/factories/team_agent_factory/__init__.py b/aixplain/factories/team_agent_factory/__init__.py index 72d47c03..3f65b4b0 100644 --- a/aixplain/factories/team_agent_factory/__init__.py +++ b/aixplain/factories/team_agent_factory/__init__.py @@ -25,8 +25,6 @@ import logging from aixplain.enums.supplier import Supplier -from aixplain.factories.agent_factory import AgentFactory -from aixplain.factories.agent_factory.utils import validate_llm, validate_name from aixplain.modules.agent import Agent from aixplain.modules.team_agent import TeamAgent from aixplain.utils import config @@ -50,67 +48,73 @@ def create( use_mentalist_and_inspector: bool = True, ) -> TeamAgent: """Create a new team agent in the platform.""" - validate_name(name) - # validate LLM ID - validate_llm(llm_id) assert len(agents) > 0, "TeamAgent Onboarding Error: At least one agent must be provided." for agent in agents: if isinstance(agent, Text) is True: try: + from aixplain.factories.agent_factory import AgentFactory + agent = AgentFactory.get(agent) except Exception: raise Exception(f"TeamAgent Onboarding Error: Agent {agent} does not exist.") else: + from aixplain.modules.agent import Agent + assert isinstance(agent, Agent), "TeamAgent Onboarding Error: Agents must be instances of Agent class" mentalist_and_inspector_llm_id = None if use_mentalist_and_inspector is True: mentalist_and_inspector_llm_id = llm_id + + team_agent = None + url = urljoin(config.BACKEND_URL, "sdk/agent-communities") + headers = {"x-api-key": api_key} + + if isinstance(supplier, dict): + supplier = supplier["code"] + elif isinstance(supplier, Supplier): + supplier = supplier.value["code"] + + agent_list = [] + for idx, agent in enumerate(agents): + agent_list.append({"assetId": agent.id, "number": idx, "type": "AGENT", "label": "AGENT"}) + + payload = { + "name": name, + "agents": agent_list, + "links": [], + "description": description, + "llmId": llm_id, + "supervisorId": llm_id, + "plannerId": mentalist_and_inspector_llm_id, + "supplier": supplier, + "version": version, + "status": "draft", + } + + team_agent = build_team_agent(payload=payload, api_key=api_key) + team_agent.validate() + response = "Unspecified error" try: - team_agent = None - url = urljoin(config.BACKEND_URL, "sdk/agent-communities") - headers = {"x-api-key": api_key} - - if isinstance(supplier, dict): - supplier = supplier["code"] - elif isinstance(supplier, Supplier): - supplier = supplier.value["code"] - - agent_list = [] - for idx, agent in enumerate(agents): - agent_list.append({"assetId": agent.id, "number": idx, "type": "AGENT", "label": "AGENT"}) - - payload = { - "name": name, - "agents": agent_list, - "links": [], - "description": description, - "llmId": llm_id, - "supervisorId": llm_id, - "plannerId": mentalist_and_inspector_llm_id, - "supplier": supplier, - "version": version, - } - - logging.info(f"Start service for POST Create TeamAgent - {url} - {headers} - {json.dumps(payload)}") + logging.debug(f"Start service for POST Create TeamAgent - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) - if 200 <= r.status_code < 300: - response = r.json() - team_agent = build_team_agent(payload=response, api_key=api_key) - else: - error = r.json() - error_msg = "TeamAgent Onboarding Error: Please contact the administrators." - if "message" in error: - msg = error["message"] - if error["message"] == "err.name_already_exists": - msg = "TeamAgent name already exists." - elif error["message"] == "err.asset_is_not_available": - msg = "Some tools are not available." - error_msg = f"TeamAgent Onboarding Error (HTTP {r.status_code}): {msg}" - logging.exception(error_msg) - raise Exception(error_msg) + response = r.json() except Exception as e: raise Exception(e) + + if 200 <= r.status_code < 300: + team_agent = build_team_agent(payload=response, api_key=api_key) + else: + error_msg = f"{response}" + if "message" in response: + msg = response["message"] + if response["message"] == "err.name_already_exists": + msg = "TeamAgent name already exists." + elif response["message"] == "err.asset_is_not_available": + msg = "Some tools are not available." + error_msg = f"TeamAgent Onboarding Error (HTTP {r.status_code}): {msg}" + logging.exception(error_msg) + raise Exception(error_msg) return team_agent @classmethod @@ -119,33 +123,34 @@ def list(cls) -> Dict: url = urljoin(config.BACKEND_URL, "sdk/agent-communities") headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + resp = {} payload = {} logging.info(f"Start service for GET List Agents - {url} - {headers} - {json.dumps(payload)}") try: r = _request_with_retry("get", url, headers=headers) resp = r.json() + except Exception: + raise Exception("Team Agent Listing Error: Please contact the administrators.") - if 200 <= r.status_code < 300: - agents, page_total, total = [], 0, 0 - results = resp - page_total = len(results) - total = len(results) - logging.info(f"Response for GET List Agents - Page Total: {page_total} / Total: {total}") - for agent in results: - agents.append(build_team_agent(agent)) - return {"results": agents, "page_total": page_total, "page_number": 0, "total": total} - else: - error_msg = "Agent Listing Error: Please contact the administrators." - if "message" in resp: - msg = resp["message"] - error_msg = f"Agent Listing Error (HTTP {r.status_code}): {msg}" - logging.exception(error_msg) - raise Exception(error_msg) - except Exception as e: - raise Exception(e) + if 200 <= r.status_code < 300: + agents, page_total, total = [], 0, 0 + results = resp + page_total = len(results) + total = len(results) + logging.info(f"Response for GET List Agents - Page Total: {page_total} / Total: {total}") + for agent in results: + agents.append(build_team_agent(agent)) + return {"results": agents, "page_total": page_total, "page_number": 0, "total": total} + else: + error_msg = "Agent Listing Error: Please contact the administrators." + if isinstance(resp, dict) and "message" in resp: + msg = resp["message"] + error_msg = f"Agent Listing Error (HTTP {r.status_code}): {msg}" + logging.exception(error_msg) + raise Exception(error_msg) @classmethod - def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> Agent: + def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> TeamAgent: """Get agent by id.""" url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{agent_id}") if config.AIXPLAIN_API_KEY != "": @@ -153,14 +158,18 @@ def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> Agent: else: api_key = api_key if api_key is not None else config.TEAM_API_KEY headers = {"x-api-key": api_key, "Content-Type": "application/json"} - logging.info(f"Start service for GET Agent - {url} - {headers}") - r = _request_with_retry("get", url, headers=headers) - resp = r.json() + logging.info(f"Start service for GET Team Agent - {url} - {headers}") + try: + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + except Exception: + raise Exception("Team Agent Get Error: Please contact the administrators.") + if 200 <= r.status_code < 300: return build_team_agent(resp) else: msg = "Please contact the administrators." if "message" in resp: msg = resp["message"] - error_msg = f"Agent Get Error (HTTP {r.status_code}): {msg}" + error_msg = f"Team Agent Get Error (HTTP {r.status_code}): {msg}" raise Exception(error_msg) diff --git a/aixplain/factories/team_agent_factory/utils.py b/aixplain/factories/team_agent_factory/utils.py index 42fa5f6c..da859a43 100644 --- a/aixplain/factories/team_agent_factory/utils.py +++ b/aixplain/factories/team_agent_factory/utils.py @@ -3,7 +3,6 @@ import aixplain.utils.config as config from aixplain.enums.asset_status import AssetStatus from aixplain.modules.team_agent import TeamAgent -from aixplain.factories.agent_factory import AgentFactory from typing import Dict, Text from urllib.parse import urljoin @@ -12,21 +11,24 @@ def build_team_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> TeamAgent: """Instantiate a new team agent in the platform.""" - agents = payload["agents"] - for i, agent in enumerate(agents): + from aixplain.factories.agent_factory import AgentFactory + + agents_dict = payload["agents"] + agents = [] + for i, agent in enumerate(agents_dict): agent = AgentFactory.get(agent["assetId"]) - agents[i] = agent + agents.append(agent) team_agent = TeamAgent( - id=payload["id"], - name=payload["name"] if "name" in payload else "", + id=payload.get("id", ""), + name=payload.get("name", ""), agents=agents, - description=payload["description"] if "description" in payload else "", - supplier=payload["teamId"] if "teamId" in payload else None, - version=payload["version"] if "version" in payload else None, - cost=payload["cost"] if "cost" in payload else None, - llm_id=payload["llmId"] if "llmId" in payload else GPT_4o_ID, - use_mentalist_and_inspector=True if "plannerId" in payload and payload["plannerId"] is not None else False, + description=payload.get("description", ""), + supplier=payload.get("teamId", None), + version=payload.get("version", None), + cost=payload.get("cost", None), + llm_id=payload.get("llmId", GPT_4o_ID), + use_mentalist_and_inspector=True if payload["plannerId"] is not None else False, api_key=api_key, status=AssetStatus(payload["status"]), ) diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index 3f892723..8d7391af 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -22,10 +22,12 @@ """ import json import logging +import re import time import traceback from aixplain.utils.file_utils import _request_with_retry +from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier from aixplain.enums.asset_status import AssetStatus from aixplain.enums.storage_type import StorageType @@ -66,7 +68,7 @@ def __init__( supplier: Union[Dict, Text, Supplier, int] = "aiXplain", version: Optional[Text] = None, cost: Optional[Dict] = None, - status: AssetStatus = AssetStatus.ONBOARDING, + status: AssetStatus = AssetStatus.DRAFT, **additional_info, ) -> None: """Create an Agent with the necessary information. @@ -91,9 +93,27 @@ def __init__( try: status = AssetStatus(status) except Exception: - status = AssetStatus.ONBOARDING + status = AssetStatus.DRAFT self.status = status + def validate(self) -> None: + """Validate the Agent.""" + from aixplain.factories.model_factory import ModelFactory + + # validate name + assert ( + re.match("^[a-zA-Z0-9 ]*$", self.name) is not None + ), "Agent Creation Error: Agent name must not contain special characters." + + try: + llm = ModelFactory.get(self.llm_id) + assert llm.function == Function.TEXT_GENERATION, "Large Language Model must be a text generation model." + except Exception: + raise Exception(f"Large Language Model with ID '{self.llm_id}' not found.") + + for tool in self.tools: + tool.validate() + def run( self, data: Optional[Union[Dict, Text]] = None, @@ -242,6 +262,18 @@ def run_async( response["error"] = msg return response + def to_dict(self) -> Dict: + return { + "id": self.id, + "name": self.name, + "assets": [tool.to_dict() for tool in self.tools], + "description": self.description, + "supplier": self.supplier.value["code"] if isinstance(self.supplier, Supplier) else self.supplier, + "version": self.version, + "llmId": self.llm_id, + "status": self.status.value, + } + def delete(self) -> None: """Delete Agent service""" try: @@ -259,4 +291,34 @@ def delete(self) -> None: except ValueError: message = f"Agent Deletion Error (HTTP {r.status_code}): There was an error in deleting the agent." logging.error(message) - raise Exception(message) + raise Exception(f"{message}") + + def update(self) -> None: + """Update agent.""" + from aixplain.factories.agent_factory.utils import build_agent + + self.validate() + url = urljoin(config.BACKEND_URL, f"sdk/agents/{self.id}") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + + payload = self.to_dict() + + logging.debug(f"Start service for PUT Update Agent - {url} - {headers} - {json.dumps(payload)}") + resp = "No specified error." + try: + r = _request_with_retry("put", url, headers=headers, json=payload) + resp = r.json() + except Exception: + raise Exception("Agent Update Error: Please contact the administrators.") + + if 200 <= r.status_code < 300: + return build_agent(resp) + else: + error_msg = f"Agent Update Error (HTTP {r.status_code}): {resp}" + raise Exception(error_msg) + + def deploy(self) -> None: + assert self.status == AssetStatus.DRAFT, "Agent must be in draft status to be deployed." + assert self.status != AssetStatus.ONBOARDED, "Agent is already deployed." + self.status = AssetStatus.ONBOARDED + self.update() diff --git a/aixplain/modules/agent/tool.py b/aixplain/modules/agent/tool.py deleted file mode 100644 index 6651afe7..00000000 --- a/aixplain/modules/agent/tool.py +++ /dev/null @@ -1,59 +0,0 @@ -__author__ = "aiXplain" - -""" -Copyright 2024 The aiXplain SDK authors - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Author: Lucas Pavanelli and Thiago Castro Ferreira -Date: May 16th 2024 -Description: - Agentification Class -""" -from typing import Text, Optional - -from aixplain.enums.function import Function -from aixplain.enums.supplier import Supplier - - -class Tool: - """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. - - Attributes: - name (Text): name of the tool - description (Text): descriptiion of the tool - function (Function): task that the tool performs - supplier (Optional[Union[Dict, Text, Supplier, int]], optional): Preferred supplier to perform the task. Defaults to None. - """ - - def __init__( - self, - name: Text, - description: Text, - function: Function, - supplier: Optional[Supplier] = None, - **additional_info, - ) -> None: - """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. - - Args: - name (Text): name of the tool - description (Text): descriptiion of the tool - function (Function): task that the tool performs - supplier (Optional[Union[Dict, Text, Supplier, int]], optional): Preferred supplier to perform the task. Defaults to None. - """ - self.name = name - self.description = description - self.function = function - self.supplier = supplier - self.additional_info = additional_info diff --git a/aixplain/modules/agent/tool/__init__.py b/aixplain/modules/agent/tool/__init__.py index 9c7a7a09..01b44dfa 100644 --- a/aixplain/modules/agent/tool/__init__.py +++ b/aixplain/modules/agent/tool/__init__.py @@ -51,3 +51,10 @@ def __init__( self.description = description self.version = version self.additional_info = additional_info + + def to_dict(self): + """Converts the tool to a dictionary.""" + raise NotImplementedError + + def validate(self): + raise NotImplementedError diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index 404ed8d7..628377a3 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -79,6 +79,26 @@ def __init__( self.model = model self.function = function + def to_dict(self) -> Dict: + """Converts the tool to a dictionary.""" + supplier = self.supplier + if supplier is not None: + if isinstance(supplier, dict): + supplier = supplier["code"] + elif isinstance(supplier, Supplier): + supplier = supplier.value["code"] + else: + supplier = str(supplier) + + return { + "function": self.function.value if self.function is not None else None, + "type": "model", + "description": self.description, + "supplier": supplier, + "version": self.version if self.version else None, + "assetId": self.model, + } + def validate(self) -> Model: from aixplain.factories.model_factory import ModelFactory diff --git a/aixplain/modules/agent/tool/pipeline_tool.py b/aixplain/modules/agent/tool/pipeline_tool.py index fa8394ea..9ea7a5fb 100644 --- a/aixplain/modules/agent/tool/pipeline_tool.py +++ b/aixplain/modules/agent/tool/pipeline_tool.py @@ -51,6 +51,13 @@ def __init__( pipeline = pipeline.id self.pipeline = pipeline + def to_dict(self): + return { + "assetId": self.pipeline, + "description": self.description, + "type": "pipeline", + } + def validate(self): from aixplain.factories.pipeline_factory import PipelineFactory diff --git a/aixplain/modules/model/response.py b/aixplain/modules/model/response.py index 902b9987..42ed09a4 100644 --- a/aixplain/modules/model/response.py +++ b/aixplain/modules/model/response.py @@ -68,3 +68,10 @@ def __repr__(self) -> str: if self.additional_fields: fields.extend([f"{k}={repr(v)}" for k, v in self.additional_fields.items()]) return f"ModelResponse({', '.join(fields)})" + + def __contains__(self, key: Text) -> bool: + try: + self[key] + return True + except KeyError: + return False diff --git a/aixplain/modules/team_agent/__init__.py b/aixplain/modules/team_agent/__init__.py index 86321489..2f8b5c3b 100644 --- a/aixplain/modules/team_agent/__init__.py +++ b/aixplain/modules/team_agent/__init__.py @@ -25,8 +25,10 @@ import logging import time import traceback +import re from aixplain.utils.file_utils import _request_with_retry +from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier from aixplain.enums.asset_status import AssetStatus from aixplain.enums.storage_type import StorageType @@ -259,3 +261,68 @@ def delete(self) -> None: ) logging.error(message) raise Exception(f"{message}") + + def to_dict(self) -> Dict: + return { + "id": self.id, + "name": self.name, + "agents": [ + {"assetId": agent.id, "number": idx, "type": "AGENT", "label": "AGENT"} for idx, agent in enumerate(self.agents) + ], + "links": [], + "description": self.description, + "llmId": self.llm_id, + "supervisorId": self.llm_id, + "plannerId": self.llm_id if self.use_mentalist_and_inspector else None, + "supplier": self.supplier, + "version": self.version, + } + + def validate(self) -> None: + """Validate the Team.""" + from aixplain.factories.model_factory import ModelFactory + + # validate name + assert ( + re.match("^[a-zA-Z0-9 ]*$", self.name) is not None + ), "Team Agent Creation Error: Team name must not contain special characters." + + try: + llm = ModelFactory.get(self.llm_id) + assert llm.function == Function.TEXT_GENERATION, "Large Language Model must be a text generation model." + except Exception: + raise Exception(f"Large Language Model with ID '{self.llm_id}' not found.") + + for agent in self.agents: + agent.validate() + + def update(self) -> None: + """Update the Team Agent.""" + from aixplain.factories.team_agent_factory.utils import build_team_agent + + self.validate() + url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{self.id}") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + + payload = self.to_dict() + + logging.debug(f"Start service for PUT Update Team Agent - {url} - {headers} - {json.dumps(payload)}") + resp = "No specified error." + try: + r = _request_with_retry("put", url, headers=headers, json=payload) + resp = r.json() + except Exception: + raise Exception("Team Agent Update Error: Please contact the administrators.") + + if 200 <= r.status_code < 300: + return build_team_agent(resp) + else: + error_msg = f"Team Agent Update Error (HTTP {r.status_code}): {resp}" + raise Exception(error_msg) + + def deploy(self) -> None: + """Deploy the Team Agent.""" + assert self.status == AssetStatus.DRAFT, "Team Agent Deployment Error: Team Agent must be in draft status." + assert self.status != AssetStatus.ONBOARDED, "Team Agent Deployment Error: Team Agent must be onboarded." + self.status = AssetStatus.ONBOARDED + self.update() diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 478b23f3..5b247728 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -15,13 +15,16 @@ See the License for the specific language governing permissions and limitations under the License. """ +import copy import json from dotenv import load_dotenv load_dotenv() from aixplain.factories import AgentFactory, TeamAgentFactory +from aixplain.enums.asset_status import AssetStatus from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier +from uuid import uuid4 import pytest @@ -56,14 +59,15 @@ def test_end2end(run_input_map, delete_agents_and_team_agents): tools = [] if "model_tools" in run_input_map: for tool in run_input_map["model_tools"]: + tool_ = copy.copy(tool) for supplier in Supplier: if tool["supplier"] is not None and tool["supplier"].lower() in [ supplier.value["code"].lower(), supplier.value["name"].lower(), ]: - tool["supplier"] = supplier + tool_["supplier"] = supplier break - tools.append(AgentFactory.create_model_tool(**tool)) + tools.append(AgentFactory.create_model_tool(**tool_)) if "pipeline_tools" in run_input_map: for tool in run_input_map["pipeline_tools"]: tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) @@ -72,6 +76,11 @@ def test_end2end(run_input_map, delete_agents_and_team_agents): name=run_input_map["agent_name"], description=run_input_map["agent_name"], llm_id=run_input_map["llm_id"], tools=tools ) assert agent is not None + assert agent.status == AssetStatus.DRAFT + # deploy agent + agent.deploy() + assert agent.status == AssetStatus.ONBOARDED + agent = AgentFactory.get(agent.id) assert agent is not None response = agent.run(data=run_input_map["query"]) @@ -91,6 +100,43 @@ def test_list_agents(): assert type(agents_result) is list +def test_update_draft_agent(run_input_map): + for team in TeamAgentFactory.list()["results"]: + team.delete() + + for agent in AgentFactory.list()["results"]: + agent.delete() + + tools = [] + if "model_tools" in run_input_map: + for tool in run_input_map["model_tools"]: + tool_ = copy.copy(tool) + for supplier in Supplier: + if tool["supplier"] is not None and tool["supplier"].lower() in [ + supplier.value["code"].lower(), + supplier.value["name"].lower(), + ]: + tool_["supplier"] = supplier + break + tools.append(AgentFactory.create_model_tool(**tool_)) + if "pipeline_tools" in run_input_map: + for tool in run_input_map["pipeline_tools"]: + tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) + + agent = AgentFactory.create( + name=run_input_map["agent_name"], description=run_input_map["agent_name"], llm_id=run_input_map["llm_id"], tools=tools + ) + + agent_name = str(uuid4()).replace("-", "") + agent.name = agent_name + agent.update() + + agent = AgentFactory.get(agent.id) + assert agent.name == agent_name + assert agent.status == AssetStatus.DRAFT + agent.delete() + + def test_fail_non_existent_llm(): with pytest.raises(Exception) as exc_info: AgentFactory.create( diff --git a/tests/functional/team_agent/team_agent_functional_test.py b/tests/functional/team_agent/team_agent_functional_test.py index c28b01da..44ea5dbc 100644 --- a/tests/functional/team_agent/team_agent_functional_test.py +++ b/tests/functional/team_agent/team_agent_functional_test.py @@ -20,9 +20,11 @@ load_dotenv() from aixplain.factories import AgentFactory, TeamAgentFactory +from aixplain.enums.asset_status import AssetStatus from aixplain.enums.function import Function from aixplain.enums.supplier import Supplier - +from copy import copy +from uuid import uuid4 import pytest RUN_FILE = "tests/functional/team_agent/data/team_agent_test_end2end.json" @@ -59,14 +61,15 @@ def test_end2end(run_input_map, delete_agents_and_team_agents): tools = [] if "model_tools" in agent: for tool in agent["model_tools"]: + tool_ = copy(tool) for supplier in Supplier: if tool["supplier"] is not None and tool["supplier"].lower() in [ supplier.value["code"].lower(), supplier.value["name"].lower(), ]: - tool["supplier"] = supplier + tool_["supplier"] = supplier break - tools.append(AgentFactory.create_model_tool(**tool)) + tools.append(AgentFactory.create_model_tool(**tool_)) if "pipeline_tools" in agent: for tool in agent["pipeline_tools"]: tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) @@ -74,6 +77,7 @@ def test_end2end(run_input_map, delete_agents_and_team_agents): agent = AgentFactory.create( name=agent["agent_name"], description=agent["agent_name"], llm_id=agent["llm_id"], tools=tools ) + agent.deploy() agents.append(agent) team_agent = TeamAgentFactory.create( @@ -85,6 +89,9 @@ def test_end2end(run_input_map, delete_agents_and_team_agents): ) assert team_agent is not None + assert team_agent.status == AssetStatus.DRAFT + # deploy team agent + team_agent.deploy() team_agent = TeamAgentFactory.get(team_agent.id) assert team_agent is not None response = team_agent.run(data=run_input_map["query"]) @@ -99,6 +106,51 @@ def test_end2end(run_input_map, delete_agents_and_team_agents): team_agent.delete() +def test_draft_team_agent_update(run_input_map): + for team in TeamAgentFactory.list()["results"]: + team.delete() + for agent in AgentFactory.list()["results"]: + agent.delete() + + agents = [] + for agent in run_input_map["agents"]: + tools = [] + if "model_tools" in agent: + for tool in agent["model_tools"]: + tool_ = copy(tool) + for supplier in Supplier: + if tool["supplier"] is not None and tool["supplier"].lower() in [ + supplier.value["code"].lower(), + supplier.value["name"].lower(), + ]: + tool_["supplier"] = supplier + break + tools.append(AgentFactory.create_model_tool(**tool_)) + if "pipeline_tools" in agent: + for tool in agent["pipeline_tools"]: + tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) + + agent = AgentFactory.create( + name=agent["agent_name"], description=agent["agent_name"], llm_id=agent["llm_id"], tools=tools + ) + agents.append(agent) + + team_agent = TeamAgentFactory.create( + name=run_input_map["team_agent_name"], + agents=agents, + description=run_input_map["team_agent_name"], + llm_id=run_input_map["llm_id"], + use_mentalist_and_inspector=True, + ) + + team_agent_name = str(uuid4()).replace("-", "") + team_agent.name = team_agent_name + team_agent.update() + team_agent = TeamAgentFactory.get(team_agent.id) + assert team_agent.name == team_agent_name + assert team_agent.status == AssetStatus.DRAFT + + def test_fail_non_existent_llm(): with pytest.raises(Exception) as exc_info: AgentFactory.create( diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 43d0d0a2..1b4fd929 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -1,5 +1,6 @@ import pytest import requests_mock +from aixplain.enums.asset_status import AssetStatus from aixplain.modules import Agent from aixplain.utils import config from aixplain.factories import AgentFactory @@ -53,7 +54,7 @@ def test_fail_key_not_found(): assert str(exc_info.value) == "Key 'input2' not found in query." -def test_sucess_query_content(): +def test_success_query_content(): agent = Agent("123", "Test Agent", "Sample Description") with requests_mock.Mocker() as mock: url = agent.url @@ -83,6 +84,12 @@ def test_invalid_modeltool(): assert str(exc_info.value) == "Model Tool Unavailable. Make sure Model '309851793' exists or you have access to it." +def test_invalid_llm_id(): + with pytest.raises(Exception) as exc_info: + AgentFactory.create(name="Test", description="", tools=[], llm_id="123") + assert str(exc_info.value) == "Large Language Model with ID '123' not found." + + def test_invalid_agent_name(): with pytest.raises(Exception) as exc_info: AgentFactory.create(name="[Test]", description="", tools=[], llm_id="6646261c6eb563165658bbb1") @@ -102,7 +109,7 @@ def test_create_agent(): "description": "Test Agent Description", "teamId": "123", "version": "1.0", - "status": "onboarded", + "status": "draft", "llmId": "6646261c6eb563165658bbb1", "pricing": {"currency": "USD", "value": 0.0}, "assets": [ @@ -145,3 +152,77 @@ def test_create_agent(): assert agent.llm_id == ref_response["llmId"] assert agent.tools[0].function.value == ref_response["assets"][0]["function"] assert agent.tools[0].description == ref_response["assets"][0]["description"] + assert agent.status == AssetStatus.DRAFT + + +def test_to_dict(): + agent = Agent( + id="", + name="Test Agent", + description="Test Agent Description", + llm_id="6646261c6eb563165658bbb1", + tools=[AgentFactory.create_model_tool(function="text-generation")], + ) + + agent_json = agent.to_dict() + assert agent_json["id"] == "" + assert agent_json["name"] == "Test Agent" + assert agent_json["description"] == "Test Agent Description" + assert agent_json["llmId"] == "6646261c6eb563165658bbb1" + assert agent_json["assets"][0]["function"] == "text-generation" + assert agent_json["assets"][0]["type"] == "model" + + +def test_update_success(): + agent = Agent( + id="123", + name="Test Agent", + description="Test Agent Description", + llm_id="6646261c6eb563165658bbb1", + tools=[AgentFactory.create_model_tool(function="text-generation")], + ) + + with requests_mock.Mocker() as mock: + url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + ref_response = { + "id": "123", + "name": "Test Agent", + "description": "Test Agent Description", + "teamId": "123", + "version": "1.0", + "status": "onboarded", + "llmId": "6646261c6eb563165658bbb1", + "pricing": {"currency": "USD", "value": 0.0}, + "assets": [ + { + "type": "model", + "supplier": "openai", + "version": "1.0", + "assetId": "6646261c6eb563165658bbb1", + "function": "text-generation", + } + ], + } + mock.put(url, headers=headers, json=ref_response) + + url = urljoin(config.BACKEND_URL, "sdk/models/6646261c6eb563165658bbb1") + model_ref_response = { + "id": "6646261c6eb563165658bbb1", + "name": "Test LLM", + "description": "Test LLM Description", + "function": {"id": "text-generation"}, + "supplier": "openai", + "version": {"id": "1.0"}, + "status": "onboarded", + "pricing": {"currency": "USD", "value": 0.0}, + } + mock.get(url, headers=headers, json=model_ref_response) + + agent.update() + + assert agent.id == ref_response["id"] + assert agent.name == ref_response["name"] + assert agent.description == ref_response["description"] + assert agent.llm_id == ref_response["llmId"] + assert agent.tools[0].function.value == ref_response["assets"][0]["function"] diff --git a/tests/unit/team_agent_test.py b/tests/unit/team_agent_test.py index fd738c04..56564b73 100644 --- a/tests/unit/team_agent_test.py +++ b/tests/unit/team_agent_test.py @@ -1,8 +1,12 @@ import pytest import requests_mock -from aixplain.modules import TeamAgent +from aixplain.enums.asset_status import AssetStatus +from aixplain.modules import Agent, TeamAgent +from aixplain.modules.agent import ModelTool from aixplain.factories import TeamAgentFactory +from aixplain.factories import AgentFactory from aixplain.utils import config +from urllib.parse import urljoin def test_fail_no_data_query(): @@ -71,3 +75,122 @@ def test_fail_number_agents(): TeamAgentFactory.create(name="Test Team Agent", agents=[]) assert str(exc_info.value) == "TeamAgent Onboarding Error: At least one agent must be provided." + + +def test_to_dict(): + team_agent = TeamAgent( + id="123", + name="Test Team Agent", + agents=[ + Agent( + id="", + name="Test Agent", + description="Test Agent Description", + llm_id="6646261c6eb563165658bbb1", + tools=[ModelTool(function="text-generation")], + ) + ], + description="Test Team Agent Description", + llm_id="6646261c6eb563165658bbb1", + use_mentalist_and_inspector=False, + ) + + team_agent_dict = team_agent.to_dict() + assert team_agent_dict["id"] == "123" + assert team_agent_dict["name"] == "Test Team Agent" + assert team_agent_dict["description"] == "Test Team Agent Description" + assert team_agent_dict["llmId"] == "6646261c6eb563165658bbb1" + assert team_agent_dict["supervisorId"] == "6646261c6eb563165658bbb1" + assert team_agent_dict["plannerId"] is None + assert len(team_agent_dict["agents"]) == 1 + assert team_agent_dict["agents"][0]["assetId"] == "" + assert team_agent_dict["agents"][0]["number"] == 0 + assert team_agent_dict["agents"][0]["type"] == "AGENT" + assert team_agent_dict["agents"][0]["label"] == "AGENT" + + +def test_create_team_agent(): + with requests_mock.Mocker() as mock: + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + # MOCK GET LLM + url = urljoin(config.BACKEND_URL, "sdk/models/6646261c6eb563165658bbb1") + model_ref_response = { + "id": "6646261c6eb563165658bbb1", + "name": "Test LLM", + "description": "Test LLM Description", + "function": {"id": "text-generation"}, + "supplier": "openai", + "version": {"id": "1.0"}, + "status": "onboarded", + "pricing": {"currency": "USD", "value": 0.0}, + } + mock.get(url, headers=headers, json=model_ref_response) + + # AGENT MOCK CREATION + url = urljoin(config.BACKEND_URL, "sdk/agents") + ref_response = { + "id": "123", + "name": "Test Agent", + "description": "Test Agent Description", + "teamId": "123", + "version": "1.0", + "status": "draft", + "llmId": "6646261c6eb563165658bbb1", + "pricing": {"currency": "USD", "value": 0.0}, + "assets": [ + { + "type": "model", + "supplier": "openai", + "version": "1.0", + "assetId": "6646261c6eb563165658bbb1", + "function": "text-generation", + } + ], + } + mock.post(url, headers=headers, json=ref_response) + + agent = AgentFactory.create( + name="Test Agent", + description="Test Agent Description", + llm_id="6646261c6eb563165658bbb1", + tools=[ModelTool(model="6646261c6eb563165658bbb1")], + ) + + # AGENT MOCK GET + url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}") + mock.get(url, headers=headers, json=ref_response) + + # TEAM MOCK CREATION + url = urljoin(config.BACKEND_URL, "sdk/agent-communities") + team_ref_response = { + "id": "team_agent_123", + "name": "TEST Multi agent", + "status": "draft", + "teamId": 645, + "description": "TEST Multi agent", + "llmId": "6646261c6eb563165658bbb1", + "assets": [], + "agents": [{"assetId": "123", "type": "AGENT", "number": 0, "label": "AGENT"}], + "links": [], + "plannerId": "6646261c6eb563165658bbb1", + "supervisorId": "6646261c6eb563165658bbb1", + "createdAt": "2024-10-28T19:30:25.344Z", + "updatedAt": "2024-10-28T19:30:25.344Z", + } + mock.post(url, headers=headers, json=team_ref_response) + + team_agent = TeamAgentFactory.create( + name="TEST Multi agent", + description="TEST Multi agent", + use_mentalist_and_inspector=True, + llm_id="6646261c6eb563165658bbb1", + agents=[agent], + ) + assert team_agent.id is not None + assert team_agent.name == team_ref_response["name"] + assert team_agent.description == team_ref_response["description"] + assert team_agent.llm_id == team_ref_response["llmId"] + assert team_agent.use_mentalist_and_inspector is True + assert team_agent.status == AssetStatus.DRAFT + assert len(team_agent.agents) == 1 + assert team_agent.agents[0].id == team_ref_response["agents"][0]["assetId"] From 834068774657b347271ca1fd738a30165c592ef3 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 4 Nov 2024 11:51:51 -0300 Subject: [PATCH 065/105] Service mode parameter in the SDK (#295) --- aixplain/modules/pipeline/asset.py | 18 ++++++++++++++++-- tests/functional/pipelines/run_test.py | 15 +++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index 0e9ed56e..f9e29235 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -139,6 +139,11 @@ def poll(self, poll_url: Text, name: Text = "pipeline_process") -> Dict: r = _request_with_retry("get", poll_url, headers=headers) try: resp = r.json() + if isinstance(resp["data"], str): + try: + resp["data"] = json.loads(resp["data"])["response"] + except Exception: + resp = r.json() logging.info(f"Single Poll for Pipeline: Status of polling for {name} : {resp}") except Exception: resp = {"status": "FAILED"} @@ -151,6 +156,7 @@ def run( name: Text = "pipeline_process", timeout: float = 20000.0, wait_time: float = 1.0, + batch_mode: bool = True, **kwargs, ) -> Dict: """Runs a pipeline call. @@ -161,6 +167,7 @@ def run( name (Text, optional): ID given to a call. Defaults to "pipeline_process". timeout (float, optional): total polling time. Defaults to 20000.0. wait_time (float, optional): wait time in seconds between polling calls. Defaults to 1.0. + batch_mode (bool, optional): Whether to run the pipeline in batch mode or online. Defaults to True. kwargs: A dictionary of keyword arguments. The keys are the argument names Returns: @@ -168,7 +175,7 @@ def run( """ start = time.time() try: - response = self.run_async(data, data_asset=data_asset, name=name, **kwargs) + response = self.run_async(data, data_asset=data_asset, name=name, batch_mode=batch_mode, **kwargs) if response["status"] == "FAILED": end = time.time() response["elapsed_time"] = end - start @@ -297,7 +304,12 @@ def __prepare_payload( return payload def run_async( - self, data: Union[Text, Dict], data_asset: Optional[Union[Text, Dict]] = None, name: Text = "pipeline_process", **kwargs + self, + data: Union[Text, Dict], + data_asset: Optional[Union[Text, Dict]] = None, + name: Text = "pipeline_process", + batch_mode: bool = True, + **kwargs, ) -> Dict: """Runs asynchronously a pipeline call. @@ -305,6 +317,7 @@ def run_async( data (Union[Text, Dict]): link to the input data data_asset (Optional[Union[Text, Dict]], optional): Data asset to be processed by the pipeline. Defaults to None. name (Text, optional): ID given to a call. Defaults to "pipeline_process". + batch_mode (bool, optional): Whether to run the pipeline in batch mode or online. Defaults to True. kwargs: A dictionary of keyword arguments. The keys are the argument names Returns: @@ -316,6 +329,7 @@ def run_async( } payload = self.__prepare_payload(data=data, data_asset=data_asset) + payload["batchmode"] = batch_mode payload.update(kwargs) payload = json.dumps(payload) call_url = f"{self.url}/{self.id}" diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index 25fadaf4..dbdb76fa 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -51,7 +51,7 @@ def test_get_pipeline(): def test_run_single_str(batchmode: bool, version: str): pipeline = PipelineFactory.list(query="SingleNodePipeline")["results"][0] - response = pipeline.run(data="Translate this thing", **{"batchmode": batchmode, "version": version}) + response = pipeline.run(data="Translate this thing", batch_mode=batchmode, **{"version": version}) assert response["status"] == "SUCCESS" @@ -71,7 +71,7 @@ def test_run_single_local_file(batchmode: bool, version: str): with open(fname, "w") as f: f.write("Translate this thing") - response = pipeline.run(data=fname, **{"batchmode": batchmode, "version": version}) + response = pipeline.run(data=fname, batch_mode=batchmode, **{"version": version}) os.remove(fname) assert response["status"] == "SUCCESS" @@ -90,7 +90,8 @@ def test_run_with_url(batchmode: bool, version: str): response = pipeline.run( data="https://aixplain-platform-assets.s3.amazonaws.com/data/dev/64c81163f8bdcac7443c2dad/data/f8.txt", - **{"batchmode": batchmode, "version": version} + batch_mode=batchmode, + **{"version": version} ) assert response["status"] == "SUCCESS" @@ -110,7 +111,7 @@ def test_run_with_dataset(batchmode: bool, version: str): data_id = dataset.source_data["en"].id pipeline = PipelineFactory.list(query="SingleNodePipeline")["results"][0] - response = pipeline.run(data=data_id, data_asset=data_asset_id, **{"batchmode": batchmode, "version": version}) + response = pipeline.run(data=data_id, data_asset=data_asset_id, batch_mode=batchmode, **{"version": version}) assert response["status"] == "SUCCESS" @@ -128,7 +129,8 @@ def test_run_multipipe_with_strings(batchmode: bool, version: str): response = pipeline.run( data={"Input": "Translate this thing.", "Reference": "Traduza esta coisa."}, - **{"batchmode": batchmode, "version": version} + batch_mode=batchmode, + **{"version": version} ) assert response["status"] == "SUCCESS" @@ -154,7 +156,8 @@ def test_run_multipipe_with_datasets(batchmode: bool, version: str): response = pipeline.run( data={"Input": input_id, "Reference": reference_id}, data_asset={"Input": data_asset_id, "Reference": data_asset_id}, - **{"batchmode": batchmode, "version": version} + batch_mode=batchmode, + **{"version": version} ) assert response["status"] == "SUCCESS" From f1bd8f3bad8598078ee9845e80b9750b5c2da727 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Fri, 8 Nov 2024 00:20:44 +0300 Subject: [PATCH 066/105] BUG-233 Prompt variables are now populated and validated automatically (#303) * BUG-233 Prompt variables are now populated and validated automatically * BUG-233 matching unit tests --- aixplain/factories/pipeline_factory/utils.py | 2 +- aixplain/modules/pipeline/designer/base.py | 24 ++- .../modules/pipeline/designer/pipeline.py | 21 +- aixplain/modules/pipeline/designer/utils.py | 13 ++ tests/unit/designer_unit_test.py | 186 +++++++++++++++--- 5 files changed, 213 insertions(+), 33 deletions(-) create mode 100644 aixplain/modules/pipeline/designer/utils.py diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py index c9291031..08954571 100644 --- a/aixplain/factories/pipeline_factory/utils.py +++ b/aixplain/factories/pipeline_factory/utils.py @@ -88,7 +88,7 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipe data_type=custom_input.get("dataType"), code=custom_input["code"], value=custom_input.get("value"), - is_required=custom_input.get("isRequired", False), + is_required=custom_input.get("isRequired", True), ) node.number = node_json["number"] node.label = node_json["label"] diff --git a/aixplain/modules/pipeline/designer/base.py b/aixplain/modules/pipeline/designer/base.py index a925840f..08d4c8c5 100644 --- a/aixplain/modules/pipeline/designer/base.py +++ b/aixplain/modules/pipeline/designer/base.py @@ -1,3 +1,4 @@ +import re from typing import ( List, Union, @@ -11,7 +12,7 @@ from aixplain.enums import DataType from .enums import NodeType, ParamType - +from .utils import find_prompt_params if TYPE_CHECKING: from .pipeline import DesignerPipeline @@ -280,14 +281,31 @@ def __getitem__(self, code: str) -> Param: return param raise KeyError(f"Parameter with code '{code}' not found.") + def special_prompt_handling(self, code: str, value: str) -> None: + """ + This method will handle the special prompt handling for asset nodes + having `text-generation` function type. + """ + from .nodes import AssetNode + + if isinstance(self.node, AssetNode) and self.node.asset.function == "text-generation": + if code == "prompt": + matches = find_prompt_params(value) + for match in matches: + self.node.inputs.create_param(match, DataType.TEXT, is_required=True) + + def set_param_value(self, code: str, value: str) -> None: + self.special_prompt_handling(code, value) + self[code].value = value + def __setitem__(self, code: str, value: str) -> None: # set param value on set item to avoid setting it manually - self[code].value = value + self.set_param_value(code, value) def __setattr__(self, name: str, value: any) -> None: # set param value on attribute assignment to avoid setting it manually if isinstance(value, str) and hasattr(self, name): - self[name].value = value + self.set_param_value(name, value) else: super().__setattr__(name, value) diff --git a/aixplain/modules/pipeline/designer/pipeline.py b/aixplain/modules/pipeline/designer/pipeline.py index ece5ac0c..79013590 100644 --- a/aixplain/modules/pipeline/designer/pipeline.py +++ b/aixplain/modules/pipeline/designer/pipeline.py @@ -6,7 +6,7 @@ from .nodes import AssetNode, Decision, Script, Input, Output, Router, Route, BareReconstructor, BareSegmentor, BareMetric from .enums import NodeType, RouteType, Operation from .mixins import OutputableMixin - +from .utils import find_prompt_params T = TypeVar("T", bound="AssetNode") @@ -125,6 +125,24 @@ def is_param_set(self, node, param): """ return param.value or self.is_param_linked(node, param) + def special_prompt_validation(self, node: Node): + """ + This method will handle the special rule for asset nodes having + `text-generation` function type where if any prompt variable exists + then the `text` param is not required but the prompt param are. + + :param node: the node + :raises ValueError: if the pipeline is not valid + """ + if isinstance(node, AssetNode) and node.asset.function == "text-generation": + if self.is_param_set(node, node.inputs.prompt): + matches = find_prompt_params(node.inputs.prompt.value) + if matches: + node.inputs.text.is_required = False + for match in matches: + if match not in node.inputs: + raise ValueError(f"Param {match} of node {node.label} should be defined and set") + def validate_params(self): """ This method will check if all required params are either set or linked @@ -132,6 +150,7 @@ def validate_params(self): :raises ValueError: if the pipeline is not valid """ for node in self.nodes: + self.special_prompt_validation(node) for param in node.inputs: if param.is_required and not self.is_param_set(node, param): raise ValueError(f"Param {param.code} of node {node.label} is required") diff --git a/aixplain/modules/pipeline/designer/utils.py b/aixplain/modules/pipeline/designer/utils.py new file mode 100644 index 00000000..250d5501 --- /dev/null +++ b/aixplain/modules/pipeline/designer/utils.py @@ -0,0 +1,13 @@ +import re +from typing import List + + +def find_prompt_params(prompt: str) -> List[str]: + """ + This method will find the prompt parameters in the prompt string. + + :param prompt: the prompt string + :return: list of prompt parameters + """ + param_regex = re.compile(r"\{\{([^\}]+)\}\}") + return param_regex.findall(prompt) diff --git a/tests/unit/designer_unit_test.py b/tests/unit/designer_unit_test.py index 824fd162..57276a20 100644 --- a/tests/unit/designer_unit_test.py +++ b/tests/unit/designer_unit_test.py @@ -1,6 +1,5 @@ import pytest -import unittest.mock as mock - +from unittest.mock import patch, Mock, call from aixplain.enums import DataType from aixplain.modules.pipeline.designer.base import ( @@ -21,7 +20,7 @@ from aixplain.modules.pipeline.designer.mixins import LinkableMixin from aixplain.modules.pipeline.designer.pipeline import DesignerPipeline - +from aixplain.modules.pipeline.designer.base import find_prompt_params def test_create_node(): @@ -30,7 +29,7 @@ def test_create_node(): class BareNode(Node): pass - with mock.patch("aixplain.modules.pipeline.designer.Node.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Node.attach_to") as mock_attach_to: node = BareNode(number=3, label="FOO") mock_attach_to.assert_not_called() assert isinstance(node.inputs, Inputs) @@ -48,7 +47,7 @@ class FooNode(Node[FooNodeInputs, FooNodeOutputs]): inputs_class = FooNodeInputs outputs_class = FooNodeOutputs - with mock.patch("aixplain.modules.pipeline.designer.Node.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Node.attach_to") as mock_attach_to: node = FooNode(pipeline=pipeline, number=3, label="FOO") mock_attach_to.assert_called_once_with(pipeline) assert isinstance(node.inputs, FooNodeInputs) @@ -115,8 +114,8 @@ class AssetNode(Node): node = AssetNode() - with mock.patch.object(node.inputs, "serialize") as mock_inputs_serialize: - with mock.patch.object(node.outputs, "serialize") as mock_outputs_serialize: + with patch.object(node.inputs, "serialize") as mock_inputs_serialize: + with patch.object(node.outputs, "serialize") as mock_outputs_serialize: assert node.serialize() == { "number": node.number, "type": NodeType.ASSET, @@ -145,7 +144,7 @@ def test_create_param(): class TypedParam(Param): param_type = ParamType.INPUT - with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = TypedParam( code="param", data_type=DataType.TEXT, @@ -158,7 +157,7 @@ class TypedParam(Param): assert param.value == "foo" assert param.param_type == ParamType.INPUT - with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = TypedParam( code="param", data_type=DataType.TEXT, @@ -175,7 +174,7 @@ class TypedParam(Param): class UnTypedParam(Param): pass - with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = UnTypedParam( code="param", data_type=DataType.TEXT, @@ -186,7 +185,7 @@ class UnTypedParam(Param): assert param.param_type == ParamType.OUTPUT - with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = UnTypedParam( code="param", data_type=DataType.TEXT, @@ -202,7 +201,7 @@ class AssetNode(Node): node = AssetNode() - with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = UnTypedParam( code="param", data_type=DataType.TEXT, @@ -226,7 +225,7 @@ class AssetNode(Node): node = AssetNode() - with mock.patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Param.attach_to") as mock_attach_to: param = param_cls(code="param", data_type=DataType.TEXT, value="foo", node=node) mock_attach_to.assert_called_once_with(node) assert param.code == "param" @@ -253,7 +252,7 @@ class NoTypeParam(Param): input = InputParam(code="input", data_type=DataType.TEXT, value="foo") - with mock.patch.object(node.inputs, "add_param") as mock_add_param: + with patch.object(node.inputs, "add_param") as mock_add_param: input.attach_to(node) mock_add_param.assert_called_once_with(input) assert input.node is node @@ -265,7 +264,7 @@ class NoTypeParam(Param): output = OutputParam(code="output", data_type=DataType.TEXT, value="bar") - with mock.patch.object(node.outputs, "add_param") as mock_add_param: + with patch.object(node.outputs, "add_param") as mock_add_param: output.attach_to(node) mock_add_param.assert_called_once_with(output) assert output.node is node @@ -304,7 +303,7 @@ class AssetNode(Node, LinkableMixin): output = OutputParam(code="output", data_type=DataType.TEXT, value="bar", node=a) input = InputParam(code="input", data_type=DataType.TEXT, value="foo", node=b) - with mock.patch.object(input, "back_link") as mock_back_link: + with patch.object(input, "back_link") as mock_back_link: output.link(input) mock_back_link.assert_called_once_with(output) @@ -342,7 +341,7 @@ class AssetNode(Node, LinkableMixin): output = OutputParam(code="output", data_type=DataType.TEXT, value="bar", node=a) input = InputParam(code="input", data_type=DataType.TEXT, value="foo", node=b) - with mock.patch.object(a, "link") as mock_link: + with patch.object(a, "link") as mock_link: input.back_link(output) mock_link.assert_called_once_with(b, output, input) @@ -400,7 +399,7 @@ class AssetNode(Node, LinkableMixin): pipeline = DesignerPipeline() - with mock.patch("aixplain.modules.pipeline.designer.Link.attach_to") as mock_attach_to: + with patch("aixplain.modules.pipeline.designer.Link.attach_to") as mock_attach_to: link = Link( from_node=a, to_node=b, @@ -431,8 +430,8 @@ class AssetNode(Node, LinkableMixin): to_param="input", ) - with mock.patch.object(a, "attach_to") as mock_a_attach_to: - with mock.patch.object(b, "attach_to") as mock_b_attach_to: + with patch.object(a, "attach_to") as mock_a_attach_to: + with patch.object(b, "attach_to") as mock_b_attach_to: link.attach_to(pipeline) mock_a_attach_to.assert_called_once_with(pipeline) mock_b_attach_to.assert_called_once_with(pipeline) @@ -451,8 +450,8 @@ class AssetNode(Node, LinkableMixin): to_param="input", ) - with mock.patch.object(a, "attach_to") as mock_a_attach_to: - with mock.patch.object(b, "attach_to") as mock_b_attach_to: + with patch.object(a, "attach_to") as mock_a_attach_to: + with patch.object(b, "attach_to") as mock_b_attach_to: link.attach_to(pipeline) mock_a_attach_to.assert_not_called() mock_b_attach_to.assert_not_called() @@ -555,8 +554,8 @@ class AssetNode(Node): param_proxy = ParamProxy(node) - with mock.patch.object(param_proxy, "_create_param") as mock_create_param: - with mock.patch.object(param_proxy, "add_param") as mock_add_param: + with patch.object(param_proxy, "_create_param") as mock_create_param: + with patch.object(param_proxy, "add_param") as mock_add_param: param = param_proxy.create_param("foo", DataType.TEXT, "bar", is_required=True) mock_create_param.assert_called_once_with("foo", DataType.TEXT, "bar") mock_add_param.assert_called_once_with(param) @@ -588,6 +587,48 @@ class FooParam(Param): assert "'bar'" in str(excinfo.value) +def test_param_proxy_set_param_value(): + prompt_param = Mock(spec=Param, code="prompt") + param_proxy = ParamProxy(Mock()) + param_proxy._params = [prompt_param] + with patch.object(param_proxy, "special_prompt_handling") as mock_special_prompt_handling: + param_proxy.set_param_value("prompt", "hello {{foo}}") + mock_special_prompt_handling.assert_called_once_with("prompt", "hello {{foo}}") + assert prompt_param.value == "hello {{foo}}" + + +def test_param_proxy_special_prompt_handling(): + from aixplain.modules.pipeline.designer.nodes import AssetNode + + asset_node = Mock(spec=AssetNode, asset=Mock(function="text-generation")) + param_proxy = ParamProxy(asset_node) + with patch( + "aixplain.modules.pipeline.designer.base.find_prompt_params" + ) as mock_find_prompt_params: + mock_find_prompt_params.return_value = [] + param_proxy.special_prompt_handling("prompt", "hello {{foo}}") + mock_find_prompt_params.assert_called_once_with("hello {{foo}}") + asset_node.inputs.create_param.assert_not_called() + asset_node.reset_mock() + mock_find_prompt_params.reset_mock() + + mock_find_prompt_params.return_value = ["foo"] + param_proxy.special_prompt_handling("prompt", "hello {{foo}}") + mock_find_prompt_params.assert_called_once_with("hello {{foo}}") + asset_node.inputs.create_param.assert_called_once_with("foo", DataType.TEXT, is_required=True) + asset_node.reset_mock() + mock_find_prompt_params.reset_mock() + + mock_find_prompt_params.return_value = ["foo", "bar"] + param_proxy.special_prompt_handling("prompt", "hello {{foo}} {{bar}}") + mock_find_prompt_params.assert_called_once_with("hello {{foo}} {{bar}}") + assert asset_node.inputs.create_param.call_count == 2 + assert asset_node.inputs.create_param.call_args_list == [ + call("foo", DataType.TEXT, is_required=True), + call("bar", DataType.TEXT, is_required=True), + ] + + def test_node_link(): class AssetNode(Node, LinkableMixin): type: NodeType = NodeType.ASSET @@ -623,7 +664,7 @@ class AssetNode(Node): type: NodeType = NodeType.ASSET node1 = AssetNode() - with mock.patch.object(node1, "attach_to") as mock_attach_to: + with patch.object(node1, "attach_to") as mock_attach_to: pipeline.add_node(node1) mock_attach_to.assert_called_once_with(pipeline) @@ -636,14 +677,14 @@ class InputNode(Node): node = InputNode() - with mock.patch.object(pipeline, "add_node") as mock_add_node: + with patch.object(pipeline, "add_node") as mock_add_node: pipeline.add_nodes(node) assert mock_add_node.call_count == 1 node1 = InputNode() node2 = InputNode() - with mock.patch.object(pipeline, "add_node") as mock_add_node: + with patch.object(pipeline, "add_node") as mock_add_node: pipeline.add_nodes(node1, node2) assert mock_add_node.call_count == 2 @@ -662,6 +703,95 @@ class AssetNode(Node): link = Link(from_node=a, to_node=b, from_param="output", to_param="input") pipeline.add_link(link) - with mock.patch.object(link, "attach_to") as mock_attach_to: + with patch.object(link, "attach_to") as mock_attach_to: pipeline.add_link(link) mock_attach_to.assert_called_once_with(pipeline) + + +def test_pipeline_special_prompt_validation(): + from aixplain.modules.pipeline.designer.nodes import AssetNode + + pipeline = DesignerPipeline() + asset_node = Mock( + spec=AssetNode, + label="LLM(ID=1)", + asset=Mock(function="text-generation"), + inputs=Mock(prompt=Mock(value="hello {{foo}}"), text=Mock(is_required=True)), + ) + with patch.object(pipeline, "is_param_set") as mock_is_param_set: + mock_is_param_set.return_value = False + pipeline.special_prompt_validation(asset_node) + mock_is_param_set.assert_called_once_with(asset_node, asset_node.inputs.prompt) + assert asset_node.inputs.text.is_required is True + mock_is_param_set.reset_mock() + mock_is_param_set.return_value = True + with patch( + "aixplain.modules.pipeline.designer.pipeline.find_prompt_params" + ) as mock_find_prompt_params: + mock_find_prompt_params.return_value = [] + pipeline.special_prompt_validation(asset_node) + mock_is_param_set.assert_called_once_with( + asset_node, asset_node.inputs.prompt + ) + mock_find_prompt_params.assert_called_once_with( + asset_node.inputs.prompt.value + ) + assert asset_node.inputs.text.is_required is True + + mock_is_param_set.reset_mock() + mock_is_param_set.return_value = True + mock_find_prompt_params.reset_mock() + mock_find_prompt_params.return_value = ["foo"] + asset_node.inputs.__contains__ = Mock(return_value=False) + + with pytest.raises( + ValueError, + match="Param foo of node LLM\\(ID=1\\) should be defined and set", + ): + pipeline.special_prompt_validation(asset_node) + + mock_is_param_set.assert_called_once_with( + asset_node, asset_node.inputs.prompt + ) + mock_find_prompt_params.assert_called_once_with( + asset_node.inputs.prompt.value + ) + assert asset_node.inputs.text.is_required is False + + mock_is_param_set.reset_mock() + mock_is_param_set.return_value = True + mock_find_prompt_params.reset_mock() + mock_find_prompt_params.return_value = ["foo"] + asset_node.inputs.text.is_required = True + + asset_node.inputs.__contains__ = Mock(return_value=True) + pipeline.special_prompt_validation(asset_node) + mock_is_param_set.assert_called_once_with( + asset_node, asset_node.inputs.prompt + ) + mock_find_prompt_params.assert_called_once_with( + asset_node.inputs.prompt.value + ) + assert asset_node.inputs.text.is_required is False + + +@pytest.mark.parametrize( + "input, expected", + [ + ("hello {{foo}}", ["foo"]), + ("hello {{foo}} {{bar}}", ["foo", "bar"]), + ("hello {{foo}} {{bar}} {{baz}}", ["foo", "bar", "baz"]), + # no match cases + ("hello bar", []), + ("hello {{foo]] bar", []), + ("hello {foo} bar", []), + # edge cases + ("", []), + ("{{}}", []), + # interesting cases + ("hello {{foo {{bar}} baz}} {{bar}} {{baz}}", ["foo {{bar", "bar", "baz"]), + ], +) +def test_find_prompt_params(input, expected): + print(input, expected) + assert find_prompt_params(input) == expected From e2d1be19c9e2b6f170c5c9f2a7b90b6566499720 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Fri, 8 Nov 2024 19:31:12 -0300 Subject: [PATCH 067/105] Fix check_storage_type to not detect folder as a file (#302) --- aixplain/factories/file_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aixplain/factories/file_factory.py b/aixplain/factories/file_factory.py index 2085c75d..1a29ac11 100644 --- a/aixplain/factories/file_factory.py +++ b/aixplain/factories/file_factory.py @@ -91,7 +91,7 @@ def check_storage_type(cls, input_link: Any) -> StorageType: Returns: StorageType: URL, TEXT or FILE """ - if os.path.exists(input_link) is True: + if os.path.exists(input_link) is True and os.path.isfile(input_link) is True: return StorageType.FILE elif ( input_link.startswith("s3://") From 3754cc4c01d77886f608a2cfb0311b349a233c81 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Mon, 11 Nov 2024 21:59:09 +0300 Subject: [PATCH 068/105] ENG-979 New functional test for script nodes (#305) * ENG-979 New functional test for script nodes * Script test only on pipeline 3.0 --------- Co-authored-by: Thiago Castro Ferreira --- tests/functional/pipelines/run_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index dbdb76fa..6ca9e6fe 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -241,3 +241,13 @@ def test_run_decision(input_data: str, output_data: str, version: str): assert response["status"] == "SUCCESS" assert response["data"][0]["label"] == output_data + + +@pytest.mark.parametrize("version", ["3.0"]) +def test_run_script(version: str): + pipeline = PipelineFactory.list(query="Script Functional Test - DO NOT DELETE")["results"][0] + response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", **{"version": version}) + + assert response["status"] == "SUCCESS" + data = response["data"][0]["segments"][0]["response"] + assert data.startswith("SCRIPT MODIFIED:") From 635325496dbad0e2dbb201dcca7b8f08c3cade0e Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Mon, 11 Nov 2024 23:21:46 +0300 Subject: [PATCH 069/105] Bug 228 sdk always return model response when running model (#304) * Return sync poll as model response, removed model status and switched to asset status, all model unit tests working * response status * AssetStatus to ResponseStatus --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/enums/__init__.py | 2 +- aixplain/enums/model_status.py | 11 ---------- aixplain/enums/response_status.py | 31 ++++++++++++++++++++++++++ aixplain/modules/model/__init__.py | 34 ++++++++++++++++++----------- aixplain/modules/model/llm_model.py | 6 ++--- aixplain/modules/model/response.py | 6 ++--- tests/unit/llm_test.py | 4 ++-- tests/unit/model_test.py | 12 +++++----- 8 files changed, 66 insertions(+), 40 deletions(-) delete mode 100644 aixplain/enums/model_status.py create mode 100644 aixplain/enums/response_status.py diff --git a/aixplain/enums/__init__.py b/aixplain/enums/__init__.py index d66facce..947d59a9 100644 --- a/aixplain/enums/__init__.py +++ b/aixplain/enums/__init__.py @@ -13,4 +13,4 @@ from .supplier import Supplier from .sort_by import SortBy from .sort_order import SortOrder -from .model_status import ModelStatus +from .response_status import ResponseStatus \ No newline at end of file diff --git a/aixplain/enums/model_status.py b/aixplain/enums/model_status.py deleted file mode 100644 index af4ae0a9..00000000 --- a/aixplain/enums/model_status.py +++ /dev/null @@ -1,11 +0,0 @@ -from enum import Enum -from typing import Text - - -class ModelStatus(Text, Enum): - FAILED = "FAILED" - IN_PROGRESS = "IN_PROGRESS" - SUCCESS = "SUCCESS" - - def __str__(self): - return self._value_ diff --git a/aixplain/enums/response_status.py b/aixplain/enums/response_status.py new file mode 100644 index 00000000..d2810753 --- /dev/null +++ b/aixplain/enums/response_status.py @@ -0,0 +1,31 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Duraikrishna Selvaraju, Thiago Castro Ferreira, Shreyas Sharma and Lucas Pavanelli +Date: February 21st 2024 +Description: + Asset Enum +""" + +from enum import Enum +from typing import Text + + +class ResponseStatus(Text, Enum): + IN_PROGRESS = "IN_PROGRESS" + SUCCESS = "SUCCESS" + FAILED = "FAILED" diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 41abf865..2ac8b37f 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -32,7 +32,7 @@ from typing import Union, Optional, Text, Dict from datetime import datetime from aixplain.modules.model.response import ModelResponse -from aixplain.enums import ModelStatus +from aixplain.enums.response_status import ResponseStatus class Model(Asset): @@ -118,7 +118,9 @@ def __repr__(self): except Exception: return f"" - def sync_poll(self, poll_url: Text, name: Text = "model_process", wait_time: float = 0.5, timeout: float = 300) -> Dict: + def sync_poll( + self, poll_url: Text, name: Text = "model_process", wait_time: float = 0.5, timeout: float = 300 + ) -> ModelResponse: """Keeps polling the platform to check whether an asynchronous call is done. Args: @@ -135,7 +137,7 @@ def sync_poll(self, poll_url: Text, name: Text = "model_process", wait_time: flo # keep wait time as 0.2 seconds the minimum wait_time = max(wait_time, 0.2) completed = False - response_body = {"status": "FAILED", "completed": False} + response_body = ModelResponse(status=ResponseStatus.FAILED, completed=False) while not completed and (end - start) < timeout: try: response_body = self.poll(poll_url, name=name) @@ -147,13 +149,17 @@ def sync_poll(self, poll_url: Text, name: Text = "model_process", wait_time: flo if wait_time < 60: wait_time *= 1.1 except Exception as e: - response_body = {"status": "FAILED", "completed": False, "error_message": "No response from the service."} + response_body = ModelResponse( + status=ResponseStatus.FAILED, completed=False, error_message="No response from the service." + ) logging.error(f"Polling for Model: polling for {name}: {e}") break if response_body["completed"] is True: logging.debug(f"Polling for Model: Final status of polling for {name}: {response_body}") else: - response_body["status"] = "FAILED" + response_body = ModelResponse( + status=ResponseStatus.FAILED, completed=False, error_message="No response from the service." + ) logging.error( f"Polling for Model: Final status of polling for {name}: No response in {timeout} seconds - {response_body}" ) @@ -174,11 +180,11 @@ def poll(self, poll_url: Text, name: Text = "model_process") -> ModelResponse: try: resp = r.json() if resp["completed"] is True: - status = ModelStatus.SUCCESS + status = ResponseStatus.SUCCESS if "error_message" in resp or "supplierError" in resp: - status = ModelStatus.FAILED + status = ResponseStatus.FAILED else: - status = ModelStatus.IN_PROGRESS + status = ResponseStatus.IN_PROGRESS logging.debug(f"Single Poll for Model: Status of polling for {name}: {resp}") return ModelResponse( status=resp.pop("status", status), @@ -195,7 +201,7 @@ def poll(self, poll_url: Text, name: Text = "model_process") -> ModelResponse: resp = {"status": "FAILED"} logging.error(f"Single Poll for Model: Error of polling for {name}: {e}") return ModelResponse( - status=ModelStatus.FAILED, + status=ResponseStatus.FAILED, error_message=str(e), completed=False, ) @@ -234,9 +240,9 @@ def run( msg = f"Error in request for {name} - {traceback.format_exc()}" logging.error(f"Model Run: Error in running for {name}: {e}") end = time.time() - response = {"status": "FAILED", "error": msg, "runTime": end - start} + response = {"status": "FAILED", "error_message": msg, "runTime": end - start} return ModelResponse( - status=response.pop("status", ModelStatus.FAILED), + status=response.pop("status", ResponseStatus.FAILED), data=response.pop("data", ""), details=response.pop("details", {}), completed=response.pop("completed", False), @@ -247,7 +253,9 @@ def run( **response, ) - def run_async(self, data: Union[Text, Dict], name: Text = "model_process", parameters: Optional[Dict] = {}) -> ModelResponse: + def run_async( + self, data: Union[Text, Dict], name: Text = "model_process", parameters: Optional[Dict] = {} + ) -> ModelResponse: """Runs asynchronously a model call. Args: @@ -263,7 +271,7 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param payload = build_payload(data=data, parameters=parameters) response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) return ModelResponse( - status=response.pop("status", ModelStatus.FAILED), + status=response.pop("status", ResponseStatus.FAILED), data=response.pop("data", ""), details=response.pop("details", {}), completed=response.pop("completed", False), diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 941c4a6f..48bfcc11 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -29,7 +29,7 @@ from aixplain.utils import config from typing import Union, Optional, List, Text, Dict from aixplain.modules.model.response import ModelResponse -from aixplain.enums import ModelStatus +from aixplain.enums.response_status import ResponseStatus class LLM(Model): @@ -152,7 +152,7 @@ def run( end = time.time() response = {"status": "FAILED", "error": msg, "elapsed_time": end - start} return ModelResponse( - status=response.pop("status", ModelStatus.FAILED), + status=response.pop("status", ResponseStatus.FAILED), data=response.pop("data", ""), details=response.pop("details", {}), completed=response.pop("completed", False), @@ -206,7 +206,7 @@ def run_async( payload = build_payload(data=data, parameters=parameters) response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) return ModelResponse( - status=response.pop("status", ModelStatus.FAILED), + status=response.pop("status", ResponseStatus.FAILED), data=response.pop("data", ""), details=response.pop("details", {}), completed=response.pop("completed", False), diff --git a/aixplain/modules/model/response.py b/aixplain/modules/model/response.py index 42ed09a4..94ddcb9d 100644 --- a/aixplain/modules/model/response.py +++ b/aixplain/modules/model/response.py @@ -1,15 +1,13 @@ -from dataclasses import dataclass from typing import Text, Any, Optional, Dict, List, Union -from aixplain.enums import ModelStatus +from aixplain.enums import ResponseStatus -@dataclass class ModelResponse: """ModelResponse class to store the response of the model run.""" def __init__( self, - status: ModelStatus, + status: ResponseStatus, data: Text = "", details: Optional[Union[Dict, List]] = {}, completed: bool = False, diff --git a/tests/unit/llm_test.py b/tests/unit/llm_test.py index 1329e136..073ed3ac 100644 --- a/tests/unit/llm_test.py +++ b/tests/unit/llm_test.py @@ -4,7 +4,7 @@ load_dotenv() from aixplain.utils import config -from aixplain.enums import ModelStatus +from aixplain.enums import ResponseStatus from aixplain.modules.model.response import ModelResponse from aixplain.modules import LLM @@ -85,7 +85,7 @@ def test_run_sync(): response = test_model.run(data=input_data, temperature=0.001, max_tokens=128, top_p=1.0) assert isinstance(response, ModelResponse) - assert response.status == ModelStatus.SUCCESS + assert response.status == ResponseStatus.SUCCESS assert response.data == "Test Model Result" assert response.completed is True assert response.used_credits == 0 diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index 94e2f6c2..9ddb6bc0 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -27,7 +27,7 @@ from aixplain.factories import ModelFactory from aixplain.enums import Function from urllib.parse import urljoin -from aixplain.enums import ModelStatus +from aixplain.enums import ResponseStatus from aixplain.modules.model.response import ModelResponse import pytest from unittest.mock import patch @@ -67,7 +67,7 @@ def test_call_run_endpoint_sync(): model_id = "model-id" execute_url = f"{base_url}/{model_id}".replace("/api/v1/execute", "/api/v2/execute") payload = {"data": "input_data"} - ref_response = {"completed": True, "status": ModelStatus.SUCCESS, "data": "Hello"} + ref_response = {"completed": True, "status": ResponseStatus.SUCCESS, "data": "Hello"} with requests_mock.Mocker() as mock: mock.post(execute_url, json=ref_response) @@ -88,7 +88,7 @@ def test_success_poll(): hyp_response = test_model.poll(poll_url=poll_url) assert isinstance(hyp_response, ModelResponse) assert hyp_response["completed"] == ref_response["completed"] - assert hyp_response["status"] == ModelStatus.SUCCESS + assert hyp_response["status"] == ResponseStatus.SUCCESS def test_failed_poll(): @@ -103,7 +103,7 @@ def test_failed_poll(): response = model.poll(poll_url=poll_url) assert isinstance(response, ModelResponse) - assert response.status == ModelStatus.FAILED + assert response.status == ResponseStatus.FAILED assert response.error_message == "Some error occurred" assert response.completed is True @@ -145,7 +145,7 @@ def test_run_async_errors(status_code, error_message): test_model = Model(id=model_id, name="Test Model", url=base_url) response = test_model.run_async(data="input_data") assert isinstance(response, ModelResponse) - assert response["status"] == ModelStatus.FAILED + assert response["status"] == ResponseStatus.FAILED assert response["error_message"] == error_message @@ -219,7 +219,7 @@ def test_run_sync(): response = test_model.run(data=input_data, name="test_run") assert isinstance(response, ModelResponse) - assert response.status == ModelStatus.SUCCESS + assert response.status == ResponseStatus.SUCCESS assert response.data == "Test Model Result" assert response.completed is True assert response.used_credits == 0 From b5e22858325f1d37102b75bd22fd261c4f67da48 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:28:19 -0300 Subject: [PATCH 070/105] Make function filter in model search optional (#307) --- aixplain/factories/model_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index 209ff75d..b6588023 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -222,7 +222,7 @@ def _get_assets_from_page( @classmethod def list( cls, - function: Function, + function: Optional[Function] = None, query: Optional[Text] = "", suppliers: Optional[Union[Supplier, List[Supplier]]] = None, source_languages: Optional[Union[Language, List[Language]]] = None, From d2733307338dddd7d45767caac6ad19ae3efa0c9 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Mon, 11 Nov 2024 18:25:52 -0300 Subject: [PATCH 071/105] Set default 'parameters' to None and adding tests (#300) Co-authored-by: Lucas Pavanelli --- .pre-commit-config.yaml | 2 +- aixplain/enums/supplier.py | 1 + aixplain/modules/model/__init__.py | 9 +- aixplain/modules/model/llm_model.py | 36 +-- aixplain/modules/model/utils.py | 7 +- tests/conftest.py | 4 + .../functional/agent/agent_functional_test.py | 17 +- tests/unit/llm_test.py | 67 +++++ tests/unit/model_test.py | 234 +++++++++++++++++- 9 files changed, 343 insertions(+), 34 deletions(-) create mode 100644 tests/conftest.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a79973ee..c6b06079 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: hooks: - id: pytest-check name: pytest-check - entry: coverage run -m pytest tests/unit + entry: coverage run --source=. -m pytest tests/unit language: python pass_filenames: false types: [python] diff --git a/aixplain/enums/supplier.py b/aixplain/enums/supplier.py index 5d3e137d..ecc29998 100644 --- a/aixplain/enums/supplier.py +++ b/aixplain/enums/supplier.py @@ -48,6 +48,7 @@ def load_suppliers(): headers = {"x-aixplain-key": aixplain_key, "Content-Type": "application/json"} else: headers = {"x-api-key": api_key, "Content-Type": "application/json"} + logging.debug(f"Start service for GET API Creation - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) if not 200 <= r.status_code < 300: raise Exception( diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 2ac8b37f..104bcb62 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -106,6 +106,7 @@ def to_dict(self) -> Dict: return { "id": self.id, "name": self.name, + "description": self.description, "supplier": self.supplier, "additional_info": clean_additional_info, "input_params": self.input_params, @@ -211,7 +212,7 @@ def run( data: Union[Text, Dict], name: Text = "model_process", timeout: float = 300, - parameters: Optional[Dict] = {}, + parameters: Optional[Dict] = None, wait_time: float = 0.5, ) -> ModelResponse: """Runs a model call. @@ -220,7 +221,7 @@ def run( data (Union[Text, Dict]): link to the input data name (Text, optional): ID given to a call. Defaults to "model_process". timeout (float, optional): total polling time. Defaults to 300. - parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + parameters (Dict, optional): optional parameters to the model. Defaults to None. wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. Returns: @@ -254,14 +255,14 @@ def run( ) def run_async( - self, data: Union[Text, Dict], name: Text = "model_process", parameters: Optional[Dict] = {} + self, data: Union[Text, Dict], name: Text = "model_process", parameters: Optional[Dict] = None ) -> ModelResponse: """Runs asynchronously a model call. Args: data (Union[Text, Dict]): link to the input data name (Text, optional): ID given to a call. Defaults to "model_process". - parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + parameters (Dict, optional): optional parameters to the model. Defaults to None. Returns: dict: polling URL in response diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 48bfcc11..600fd32e 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -104,7 +104,7 @@ def run( top_p: float = 1.0, name: Text = "model_process", timeout: float = 300, - parameters: Optional[Dict] = {}, + parameters: Optional[Dict] = None, wait_time: float = 0.5, ) -> ModelResponse: """Synchronously running a Large Language Model (LLM) model. @@ -119,21 +119,23 @@ def run( top_p (float, optional): Top P. Defaults to 1.0. name (Text, optional): ID given to a call. Defaults to "model_process". timeout (float, optional): total polling time. Defaults to 300. - parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + parameters (Dict, optional): optional parameters to the model. Defaults to None. wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. Returns: Dict: parsed output from model """ start = time.time() + if parameters is None: + parameters = {} parameters.update( { - "context": parameters["context"] if "context" in parameters else context, - "prompt": parameters["prompt"] if "prompt" in parameters else prompt, - "history": parameters["history"] if "history" in parameters else history, - "temperature": parameters["temperature"] if "temperature" in parameters else temperature, - "max_tokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, - "top_p": parameters["top_p"] if "top_p" in parameters else top_p, + "context": parameters.get("context", context), + "prompt": parameters.get("prompt", prompt), + "history": parameters.get("history", history), + "temperature": parameters.get("temperature", temperature), + "max_tokens": parameters.get("max_tokens", max_tokens), + "top_p": parameters.get("top_p", top_p), } ) payload = build_payload(data=data, parameters=parameters) @@ -173,7 +175,7 @@ def run_async( max_tokens: int = 128, top_p: float = 1.0, name: Text = "model_process", - parameters: Optional[Dict] = {}, + parameters: Optional[Dict] = None, ) -> ModelResponse: """Runs asynchronously a model call. @@ -186,21 +188,23 @@ def run_async( max_tokens (int, optional): Maximum Generation Tokens. Defaults to 128. top_p (float, optional): Top P. Defaults to 1.0. name (Text, optional): ID given to a call. Defaults to "model_process". - parameters (Dict, optional): optional parameters to the model. Defaults to "{}". + parameters (Dict, optional): optional parameters to the model. Defaults to None. Returns: dict: polling URL in response """ url = f"{self.url}/{self.id}" logging.debug(f"Model Run Async: Start service for {name} - {url}") + if parameters is None: + parameters = {} parameters.update( { - "context": parameters["context"] if "context" in parameters else context, - "prompt": parameters["prompt"] if "prompt" in parameters else prompt, - "history": parameters["history"] if "history" in parameters else history, - "temperature": parameters["temperature"] if "temperature" in parameters else temperature, - "max_tokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, - "top_p": parameters["top_p"] if "top_p" in parameters else top_p, + "context": parameters.get("context", context), + "prompt": parameters.get("prompt", prompt), + "history": parameters.get("history", history), + "temperature": parameters.get("temperature", temperature), + "max_tokens": parameters.get("max_tokens", max_tokens), + "top_p": parameters.get("top_p", top_p), } ) payload = build_payload(data=data, parameters=parameters) diff --git a/aixplain/modules/model/utils.py b/aixplain/modules/model/utils.py index 2235b35a..13cc1f7c 100644 --- a/aixplain/modules/model/utils.py +++ b/aixplain/modules/model/utils.py @@ -3,12 +3,15 @@ import json import logging from aixplain.utils.file_utils import _request_with_retry -from typing import Dict, Text, Union +from typing import Dict, Text, Union, Optional -def build_payload(data: Union[Text, Dict], parameters: Dict = {}): +def build_payload(data: Union[Text, Dict], parameters: Optional[Dict] = None): from aixplain.factories import FileFactory + if parameters is None: + parameters = {} + data = FileFactory.to_link(data) if isinstance(data, dict): payload = data diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..a03eea30 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,4 @@ +from dotenv import load_dotenv + +# Load environment variables once for all tests +load_dotenv() diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 5b247728..55d671e0 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -39,6 +39,7 @@ def read_data(data_path): def run_input_map(request): return request.param + @pytest.fixture(scope="function") def delete_agents_and_team_agents(): for team_agent in TeamAgentFactory.list()["results"]: @@ -100,12 +101,8 @@ def test_list_agents(): assert type(agents_result) is list -def test_update_draft_agent(run_input_map): - for team in TeamAgentFactory.list()["results"]: - team.delete() - - for agent in AgentFactory.list()["results"]: - agent.delete() +def test_update_draft_agent(run_input_map, delete_agents_and_team_agents): + assert delete_agents_and_team_agents tools = [] if "model_tools" in run_input_map: @@ -137,7 +134,8 @@ def test_update_draft_agent(run_input_map): agent.delete() -def test_fail_non_existent_llm(): +def test_fail_non_existent_llm(delete_agents_and_team_agents): + assert delete_agents_and_team_agents with pytest.raises(Exception) as exc_info: AgentFactory.create( name="Test Agent", @@ -147,6 +145,7 @@ def test_fail_non_existent_llm(): ) assert str(exc_info.value) == "Large Language Model with ID 'non_existent_llm' not found." + def test_delete_agent_in_use(delete_agents_and_team_agents): assert delete_agents_and_team_agents agent = AgentFactory.create( @@ -160,7 +159,7 @@ def test_delete_agent_in_use(delete_agents_and_team_agents): description="Test description", use_mentalist_and_inspector=True, ) - + with pytest.raises(Exception) as exc_info: agent.delete() - assert str(exc_info.value) == "Agent Deletion Error (HTTP 403): err.agent_is_in_use." \ No newline at end of file + assert str(exc_info.value) == "Agent Deletion Error (HTTP 403): err.agent_is_in_use." diff --git a/tests/unit/llm_test.py b/tests/unit/llm_test.py index 073ed3ac..753a8f7a 100644 --- a/tests/unit/llm_test.py +++ b/tests/unit/llm_test.py @@ -91,3 +91,70 @@ def test_run_sync(): assert response.used_credits == 0 assert response.run_time == 0 assert response.usage is None + + +@pytest.mark.skip(reason="Need to fix model response") +def test_run_sync_polling_error(): + """Test handling of polling errors in the run method""" + model_id = "test-model-id" + base_url = config.MODELS_RUN_URL + execute_url = f"{base_url}/{model_id}".replace("/api/v1/execute", "/api/v2/execute") + + ref_response = { + "status": "IN_PROGRESS", + "data": "https://models.aixplain.com/api/v1/data/invalid-id", + } + + with requests_mock.Mocker() as mock: + # Mock the initial execution call + mock.post(execute_url, json=ref_response) + + # Mock the polling URL to raise an exception + poll_url = ref_response["data"] + mock.get(poll_url, exc=Exception("Polling failed")) + + test_model = LLM(id=model_id, name="Test Model", function=Function.TEXT_GENERATION, url=base_url) + + response = test_model.run(data="test input") + + # Updated assertions to match ModelResponse structure + assert isinstance(response, ModelResponse) + assert response.status == ResponseStatus.FAILED + assert response.completed is False + assert "No response from the service" in response.error_message + assert response.data == "" + assert response.used_credits == 0 + assert response.run_time == 0 + assert response.usage is None + + +def test_run_with_custom_parameters(): + """Test run method with custom parameters""" + model_id = "test-model-id" + base_url = config.MODELS_RUN_URL + execute_url = f"{base_url}/{model_id}".replace("/api/v1/execute", "/api/v2/execute") + + ref_response = { + "completed": True, + "status": "SUCCESS", + "data": "Test Result", + "usedCredits": 10, + "runTime": 1.5, + "usage": {"prompt_tokens": 10, "completion_tokens": 20}, + } + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=ref_response) + + test_model = LLM(id=model_id, name="Test Model", function=Function.TEXT_GENERATION, url=base_url) + + custom_params = {"custom_param": "value", "temperature": 0.8} # This should override the default + + response = test_model.run(data="test input", temperature=0.5, parameters=custom_params) + + assert isinstance(response, ModelResponse) + assert response.status == ResponseStatus.SUCCESS + assert response.data == "Test Result" + assert response.used_credits == 10 + assert response.run_time == 1.5 + assert response.usage == {"prompt_tokens": 10, "completion_tokens": 20} diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index 9ddb6bc0..b45b6ae0 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -16,10 +16,8 @@ limitations under the License. """ -from dotenv import load_dotenv import requests_mock -load_dotenv() import json from aixplain.utils import config from aixplain.modules import Model @@ -31,6 +29,7 @@ from aixplain.modules.model.response import ModelResponse import pytest from unittest.mock import patch +from aixplain.enums.asset_status import AssetStatus def test_build_payload(): @@ -256,3 +255,234 @@ def test_sync_poll(): assert response["completed"] is True assert response["details"] == {"test": "test"} assert response["data"] == "Polling successful result" + + +def test_run_with_parameters(): + model_id = "test-model-id" + base_url = config.MODELS_RUN_URL + execute_url = f"{base_url}/{model_id}".replace("/api/v1/execute", "/api/v2/execute") + + input_data = "test input" + parameters = {"temperature": 0.7, "max_tokens": 100} + expected_payload = json.dumps({"data": input_data, **parameters}) + + ref_response = { + "completed": True, + "status": "SUCCESS", + "data": "Test Model Result", + "usedCredits": 0, + "runTime": 0, + } + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=ref_response) + + test_model = Model(id=model_id, name="Test Model", url=base_url, api_key=config.TEAM_API_KEY) + response = test_model.run(data=input_data, parameters=parameters) + + # Verify the payload was constructed correctly + assert mock.last_request.text == expected_payload + assert isinstance(response, ModelResponse) + assert response.status == ResponseStatus.SUCCESS + assert response.data == "Test Model Result" + + +def test_run_async_with_parameters(): + model_id = "test-model-id" + base_url = config.MODELS_RUN_URL + execute_url = f"{base_url}/{model_id}" + + input_data = "test input" + parameters = {"temperature": 0.7, "max_tokens": 100} + expected_payload = json.dumps({"data": input_data, **parameters}) + + ref_response = { + "completed": False, + "status": "IN_PROGRESS", + "data": "https://models.aixplain.com/api/v1/data/test-id", + "url": "https://models.aixplain.com/api/v1/data/test-id", + } + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=ref_response) + + test_model = Model(id=model_id, name="Test Model", url=base_url, api_key=config.TEAM_API_KEY) + response = test_model.run_async(data=input_data, parameters=parameters) + + # Verify the payload was constructed correctly + assert mock.last_request.text == expected_payload + assert isinstance(response, ModelResponse) + assert response.status == "IN_PROGRESS" + assert response.url == ref_response["url"] + + +def test_successful_delete(): + with requests_mock.Mocker() as mock: + model_id = "test-model-id" + url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") + headers = {"Authorization": "Token " + config.TEAM_API_KEY, "Content-Type": "application/json"} + + # Mock successful deletion + mock.delete(url, status_code=200) + + test_model = Model(id=model_id, name="Test Model") + test_model.delete() # Should not raise any exception + + # Verify the request was made with correct headers + assert mock.last_request.headers["Authorization"] == headers["Authorization"] + assert mock.last_request.headers["Content-Type"] == headers["Content-Type"] + + +def test_failed_delete(): + with requests_mock.Mocker() as mock: + model_id = "test-model-id" + url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") + + # Mock failed deletion + mock.delete(url, status_code=404) + + test_model = Model(id=model_id, name="Test Model") + + with pytest.raises(Exception) as excinfo: + test_model.delete() + + assert "Model Deletion Error: Make sure the model exists and you are the owner." in str(excinfo.value) + + +def test_model_to_dict(): + # Test with regular additional info + model = Model(id="test-id", name="Test Model", description="", additional_info={"key1": "value1", "key2": None}) + result = model.to_dict() + + # Basic assertions + assert result["id"] == "test-id" + assert result["name"] == "Test Model" + assert result["description"] == "" + + # The additional_info is directly in the result + assert result["additional_info"] == {"additional_info": {"key1": "value1", "key2": None}} + + +def test_model_repr(): + # Test with supplier as dict + model1 = Model(id="test-id", name="Test Model", supplier={"name": "Test Supplier"}) + assert repr(model1) == "" + + # Test with supplier as string + model2 = Model(id="test-id", name="Test Model", supplier="Test Supplier") + assert str(model2) == "" + + +def test_poll_with_error(): + with requests_mock.Mocker() as mock: + poll_url = "https://models.aixplain.com/api/v1/data/test-id" + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + + # Mock a response that will cause a JSON decode error + mock.get(poll_url, headers=headers, text="Invalid JSON") + + model = Model(id="test-id", name="Test Model") + response = model.poll(poll_url=poll_url) + + assert isinstance(response, ModelResponse) + assert response.status == ResponseStatus.FAILED + assert "Expecting value: line 1 column 1" in response.error_message + + +def test_sync_poll_with_timeout(): + poll_url = "https://models.aixplain.com/api/v1/data/test-id" + model = Model(id="test-id", name="Test Model") + + # Mock poll method to always return not completed + with patch.object(model, "poll") as mock_poll: + mock_poll.return_value = {"status": "IN_PROGRESS", "completed": False, "error_message": ""} + + # Test with very short timeout + response = model.sync_poll(poll_url=poll_url, timeout=0.1, wait_time=0.2) + + assert response["status"] == "FAILED" + assert response["completed"] is False + + +def test_check_finetune_status_error(): + with requests_mock.Mocker() as mock: + model_id = "test-id" + url = urljoin(config.BACKEND_URL, f"sdk/finetune/{model_id}/ml-logs") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + + # Mock error response + error_response = {"statusCode": 404, "message": "Finetune not found"} + mock.get(url, headers=headers, json=error_response, status_code=404) + + model = Model(id=model_id, name="Test Model") + status = model.check_finetune_status() + + assert status is None + + +def test_check_finetune_status_with_logs(): + with requests_mock.Mocker() as mock: + model_id = "test-id" + url = urljoin(config.BACKEND_URL, f"sdk/finetune/{model_id}/ml-logs") + + # Mock successful response with logs using valid ResponseStatus values + success_response = { + "finetuneStatus": AssetStatus.COMPLETED.value, + "modelStatus": AssetStatus.COMPLETED.value, + "logs": [{"epoch": 1.0, "trainLoss": 0.5, "evalLoss": 0.4}, {"epoch": 2.0, "trainLoss": 0.3, "evalLoss": 0.2}], + } + mock.get(url, json=success_response) + + model = Model(id=model_id, name="Test Model", description="") + + # Test with after_epoch + status = model.check_finetune_status(after_epoch=0) + assert status is not None + assert status.epoch == 1.0 + assert status.training_loss == 0.5 + assert status.validation_loss == 0.4 + + # Test without after_epoch + status = model.check_finetune_status() + assert status is not None + assert status.epoch == 2.0 + assert status.training_loss == 0.3 + assert status.validation_loss == 0.2 + + +def test_check_finetune_status_partial_logs(): + with requests_mock.Mocker() as mock: + model_id = "test-id" + url = urljoin(config.BACKEND_URL, f"sdk/finetune/{model_id}/ml-logs") + + response = { + "finetuneStatus": AssetStatus.IN_PROGRESS.value, + "modelStatus": AssetStatus.IN_PROGRESS.value, + "logs": [{"epoch": 1.0, "trainLoss": 0.5, "evalLoss": 0.4}, {"epoch": 2.0, "trainLoss": 0.3, "evalLoss": 0.2}], + } + mock.get(url, json=response) + + model = Model(id=model_id, name="Test Model", description="") + status = model.check_finetune_status() + + assert status is not None + assert status.epoch == 2.0 + assert status.training_loss == 0.3 + assert status.validation_loss == 0.2 + + +def test_check_finetune_status_no_logs(): + with requests_mock.Mocker() as mock: + model_id = "test-id" + url = urljoin(config.BACKEND_URL, f"sdk/finetune/{model_id}/ml-logs") + + response = {"finetuneStatus": AssetStatus.IN_PROGRESS.value, "modelStatus": AssetStatus.IN_PROGRESS.value, "logs": []} + mock.get(url, json=response) + + model = Model(id=model_id, name="Test Model", description="") + status = model.check_finetune_status() + + assert status is not None + assert status.epoch is None + assert status.training_loss is None + assert status.validation_loss is None From 4eb3ac2d6edc44648bb0d4f5c0afba7968e58fbb Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 13 Nov 2024 17:05:47 -0300 Subject: [PATCH 072/105] Fixing pipeline poll bug (#309) --- aixplain/modules/pipeline/asset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index f9e29235..308f19b3 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -139,7 +139,7 @@ def poll(self, poll_url: Text, name: Text = "pipeline_process") -> Dict: r = _request_with_retry("get", poll_url, headers=headers) try: resp = r.json() - if isinstance(resp["data"], str): + if "data" in resp and isinstance(resp["data"], str): try: resp["data"] = json.loads(resp["data"])["response"] except Exception: From 332bd836ac73806bd3cd32b9d6508e553beb8034 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 20 Nov 2024 11:56:17 -0300 Subject: [PATCH 073/105] Eng-1070: agent/team's output format (#316) * Response Format * Response format to Output Format * Lowercase response format values * ResponseFormat to OutputFormat --- aixplain/modules/agent/__init__.py | 27 ++++++++++++++-------- aixplain/modules/agent/output_format.py | 30 +++++++++++++++++++++++++ aixplain/modules/team_agent/__init__.py | 26 +++++++++++++-------- tests/unit/agent_test.py | 18 +++++++++++++++ 4 files changed, 83 insertions(+), 18 deletions(-) create mode 100644 aixplain/modules/agent/output_format.py diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index 8d7391af..b7aad7aa 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -32,6 +32,7 @@ from aixplain.enums.asset_status import AssetStatus from aixplain.enums.storage_type import StorageType from aixplain.modules.model import Model +from aixplain.modules.agent.output_format import OutputFormat from aixplain.modules.agent.tool import Tool from aixplain.modules.agent.tool.model_tool import ModelTool from aixplain.modules.agent.tool.pipeline_tool import PipelineTool @@ -127,6 +128,7 @@ def run( content: Optional[Union[Dict[Text, Text], List[Text]]] = None, max_tokens: int = 2048, max_iterations: int = 10, + output_format: OutputFormat = OutputFormat.TEXT, ) -> Dict: """Runs an agent call. @@ -142,7 +144,7 @@ def run( content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. max_tokens (int, optional): maximum number of tokens which can be generated by the agent. Defaults to 2048. max_iterations (int, optional): maximum number of iterations between the agent and the tools. Defaults to 10. - + output_format (ResponseFormat, optional): response format. Defaults to TEXT. Returns: Dict: parsed output from model """ @@ -158,6 +160,7 @@ def run( content=content, max_tokens=max_tokens, max_iterations=max_iterations, + output_format=output_format, ) if response["status"] == "FAILED": end = time.time() @@ -184,6 +187,7 @@ def run_async( content: Optional[Union[Dict[Text, Text], List[Text]]] = None, max_tokens: int = 2048, max_iterations: int = 10, + output_format: OutputFormat = OutputFormat.TEXT, ) -> Dict: """Runs asynchronously an agent call. @@ -197,7 +201,7 @@ def run_async( content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. max_tokens (int, optional): maximum number of tokens which can be generated by the agent. Defaults to 2048. max_iterations (int, optional): maximum number of iterations between the agent and the tools. Defaults to 10. - + output_format (ResponseFormat, optional): response format. Defaults to TEXT. Returns: dict: polling URL in response """ @@ -234,13 +238,18 @@ def run_async( headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} - payload = {"id": self.id, "query": FileFactory.to_link(query), "sessionId": session_id, "history": history} - parameters.update( - { - "max_tokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, - "max_iterations": parameters["max_iterations"] if "max_iterations" in parameters else max_iterations, - } - ) + payload = { + "id": self.id, + "query": FileFactory.to_link(query), + "sessionId": session_id, + "history": history, + "executionParams": { + "maxTokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, + "maxIterations": parameters["max_iterations"] if "max_iterations" in parameters else max_iterations, + "outputFormat": output_format.value, + }, + } + payload.update(parameters) payload = json.dumps(payload) diff --git a/aixplain/modules/agent/output_format.py b/aixplain/modules/agent/output_format.py new file mode 100644 index 00000000..3a53e2f8 --- /dev/null +++ b/aixplain/modules/agent/output_format.py @@ -0,0 +1,30 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Duraikrishna Selvaraju, Thiago Castro Ferreira, Shreyas Sharma and Lucas Pavanelli +Date: February 21st 2024 +Description: + Asset Enum +""" + +from enum import Enum +from typing import Text + + +class OutputFormat(Text, Enum): + MARKDOWN = "markdown" + TEXT = "text" diff --git a/aixplain/modules/team_agent/__init__.py b/aixplain/modules/team_agent/__init__.py index 2f8b5c3b..08d820f0 100644 --- a/aixplain/modules/team_agent/__init__.py +++ b/aixplain/modules/team_agent/__init__.py @@ -33,7 +33,7 @@ from aixplain.enums.asset_status import AssetStatus from aixplain.enums.storage_type import StorageType from aixplain.modules.model import Model -from aixplain.modules.agent import Agent +from aixplain.modules.agent import Agent, OutputFormat from typing import Dict, List, Text, Optional, Union from urllib.parse import urljoin @@ -112,6 +112,7 @@ def run( content: Optional[Union[Dict[Text, Text], List[Text]]] = None, max_tokens: int = 2048, max_iterations: int = 30, + output_format: OutputFormat = OutputFormat.TEXT, ) -> Dict: """Runs a team agent call. @@ -127,6 +128,7 @@ def run( content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. max_tokens (int, optional): maximum number of tokens which can be generated by the agents. Defaults to 2048. max_iterations (int, optional): maximum number of iterations between the agents. Defaults to 30. + output_format (ResponseFormat, optional): response format. Defaults to TEXT. Returns: Dict: parsed output from model """ @@ -142,6 +144,7 @@ def run( content=content, max_tokens=max_tokens, max_iterations=max_iterations, + output_format=output_format, ) if response["status"] == "FAILED": end = time.time() @@ -168,6 +171,7 @@ def run_async( content: Optional[Union[Dict[Text, Text], List[Text]]] = None, max_tokens: int = 2048, max_iterations: int = 30, + output_format: OutputFormat = OutputFormat.TEXT, ) -> Dict: """Runs asynchronously a Team Agent call. @@ -181,7 +185,7 @@ def run_async( content (Union[Dict[Text, Text], List[Text]], optional): Content inputs to be processed according to the query. Defaults to None. max_tokens (int, optional): maximum number of tokens which can be generated by the agents. Defaults to 2048. max_iterations (int, optional): maximum number of iterations between the agents. Defaults to 30. - + output_format (ResponseFormat, optional): response format. Defaults to TEXT. Returns: dict: polling URL in response """ @@ -218,13 +222,17 @@ def run_async( headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} - payload = {"id": self.id, "query": FileFactory.to_link(query), "sessionId": session_id, "history": history} - parameters.update( - { - "max_tokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, - "max_iterations": parameters["max_iterations"] if "max_iterations" in parameters else max_iterations, - } - ) + payload = { + "id": self.id, + "query": FileFactory.to_link(query), + "sessionId": session_id, + "history": history, + "executionParams": { + "maxTokens": parameters["max_tokens"] if "max_tokens" in parameters else max_tokens, + "maxIterations": parameters["max_iterations"] if "max_iterations" in parameters else max_iterations, + "outputFormat": output_format.value, + }, + } payload.update(parameters) payload = json.dumps(payload) diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 1b4fd929..9e38937f 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -2,6 +2,7 @@ import requests_mock from aixplain.enums.asset_status import AssetStatus from aixplain.modules import Agent +from aixplain.modules.agent import OutputFormat from aixplain.utils import config from aixplain.factories import AgentFactory from aixplain.modules.agent import PipelineTool, ModelTool @@ -226,3 +227,20 @@ def test_update_success(): assert agent.description == ref_response["description"] assert agent.llm_id == ref_response["llmId"] assert agent.tools[0].function.value == ref_response["assets"][0]["function"] + + +def test_run_success(): + agent = Agent("123", "Test Agent", "Sample Description") + url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}/run") + agent.url = url + with requests_mock.Mocker() as mock: + headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + + ref_response = {"data": "www.aixplain.com", "status": "IN_PROGRESS"} + mock.post(url, headers=headers, json=ref_response) + + response = agent.run_async( + data={"query": "Hello, how are you?"}, max_iterations=10, output_format=OutputFormat.MARKDOWN + ) + assert response["status"] == "IN_PROGRESS" + assert response["url"] == ref_response["data"] From 574398f76fc9173270a028d603b1c67bc0d5517b Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Thu, 21 Nov 2024 23:25:55 +0530 Subject: [PATCH 074/105] Hotfix: Increase benchmark test timeout (#312) From f8ca14e034ece6bcf89b9d9b44548c3e367805f4 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 25 Nov 2024 09:40:41 -0300 Subject: [PATCH 075/105] Default parameter on ModelResponse.get (#319) --- aixplain/modules/model/response.py | 7 +++++-- tests/unit/model_test.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/aixplain/modules/model/response.py b/aixplain/modules/model/response.py index 94ddcb9d..99e32074 100644 --- a/aixplain/modules/model/response.py +++ b/aixplain/modules/model/response.py @@ -40,8 +40,11 @@ def __getitem__(self, key: Text) -> Any: return self.run_time raise KeyError(f"Key '{key}' not found in ModelResponse.") - def get(self, key: Text) -> Any: - return self[key] + def get(self, key: Text, default: Optional[Any] = None) -> Any: + try: + return self[key] + except KeyError: + return default def __repr__(self) -> str: fields = [] diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index b45b6ae0..a2463a8d 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -87,7 +87,7 @@ def test_success_poll(): hyp_response = test_model.poll(poll_url=poll_url) assert isinstance(hyp_response, ModelResponse) assert hyp_response["completed"] == ref_response["completed"] - assert hyp_response["status"] == ResponseStatus.SUCCESS + assert hyp_response.get("status") == ResponseStatus.SUCCESS def test_failed_poll(): From f4b251ae5787e0bbf465069195ebb08726ac3988 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 25 Nov 2024 09:41:05 -0300 Subject: [PATCH 076/105] ENG-1094: Validate prompt variables (#317) * Validate prompt variables * Isolate process variable process in a method --- aixplain/modules/agent/__init__.py | 6 +++++- aixplain/modules/agent/utils.py | 22 ++++++++++++++++++++++ aixplain/modules/team_agent/__init__.py | 6 +++++- tests/unit/agent_test.py | 21 +++++++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 aixplain/modules/agent/utils.py diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index b7aad7aa..c436b84a 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -36,6 +36,7 @@ from aixplain.modules.agent.tool import Tool from aixplain.modules.agent.tool.model_tool import ModelTool from aixplain.modules.agent.tool.pipeline_tool import PipelineTool +from aixplain.modules.agent.utils import process_variables from typing import Dict, List, Text, Optional, Union from urllib.parse import urljoin @@ -238,9 +239,12 @@ def run_async( headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + # build query + input_data = process_variables(query, data, parameters, self.description) + payload = { "id": self.id, - "query": FileFactory.to_link(query), + "query": input_data, "sessionId": session_id, "history": history, "executionParams": { diff --git a/aixplain/modules/agent/utils.py b/aixplain/modules/agent/utils.py new file mode 100644 index 00000000..03de61d1 --- /dev/null +++ b/aixplain/modules/agent/utils.py @@ -0,0 +1,22 @@ +from typing import Dict, Text, Union +import re + + +def process_variables(query: Text, data: Union[Dict, Text], parameters: Dict, agent_description: Text) -> Text: + from aixplain.factories.file_factory import FileFactory + + variables = re.findall(r"(? Date: Tue, 26 Nov 2024 00:10:39 +0300 Subject: [PATCH 077/105] added tests (#320) --- .../functional/agent/agent_functional_test.py | 43 ++++++++++++++ .../team_agent/team_agent_functional_test.py | 59 +++++++++++++++++++ 2 files changed, 102 insertions(+) diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 55d671e0..3f54d470 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -163,3 +163,46 @@ def test_delete_agent_in_use(delete_agents_and_team_agents): with pytest.raises(Exception) as exc_info: agent.delete() assert str(exc_info.value) == "Agent Deletion Error (HTTP 403): err.agent_is_in_use." + + +def test_update_tools_of_agent(run_input_map, delete_agents_and_team_agents): + assert delete_agents_and_team_agents + + agent = AgentFactory.create( + name=run_input_map["agent_name"], description=run_input_map["agent_name"], llm_id=run_input_map["llm_id"] + ) + assert agent is not None + assert agent.status == AssetStatus.DRAFT + assert len(agent.tools) == 0 + + tools = [] + if "model_tools" in run_input_map: + for tool in run_input_map["model_tools"]: + tool_ = copy.copy(tool) + for supplier in Supplier: + if tool["supplier"] is not None and tool["supplier"].lower() in [ + supplier.value["code"].lower(), + supplier.value["name"].lower(), + ]: + tool_["supplier"] = supplier + break + tools.append(AgentFactory.create_model_tool(**tool_)) + + if "pipeline_tools" in run_input_map: + for tool in run_input_map["pipeline_tools"]: + tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) + + agent.tools = tools + agent.update() + + agent = AgentFactory.get(agent.id) + assert len(agent.tools) == len(tools) + + removed_tool = agent.tools.pop() + agent.update() + + agent = AgentFactory.get(agent.id) + assert len(agent.tools) == len(tools) - 1 + assert removed_tool not in agent.tools + + agent.delete() diff --git a/tests/functional/team_agent/team_agent_functional_test.py b/tests/functional/team_agent/team_agent_functional_test.py index 44ea5dbc..e60e453a 100644 --- a/tests/functional/team_agent/team_agent_functional_test.py +++ b/tests/functional/team_agent/team_agent_functional_test.py @@ -160,3 +160,62 @@ def test_fail_non_existent_llm(): tools=[AgentFactory.create_model_tool(function=Function.TRANSLATION)], ) assert str(exc_info.value) == "Large Language Model with ID 'non_existent_llm' not found." + +def test_add_remove_agents_from_team_agent(run_input_map, delete_agents_and_team_agents): + assert delete_agents_and_team_agents + + agents = [] + for agent in run_input_map["agents"]: + tools = [] + if "model_tools" in agent: + for tool in agent["model_tools"]: + tool_ = copy(tool) + for supplier in Supplier: + if tool["supplier"] is not None and tool["supplier"].lower() in [ + supplier.value["code"].lower(), + supplier.value["name"].lower(), + ]: + tool_["supplier"] = supplier + break + tools.append(AgentFactory.create_model_tool(**tool_)) + if "pipeline_tools" in agent: + for tool in agent["pipeline_tools"]: + tools.append(AgentFactory.create_pipeline_tool(pipeline=tool["pipeline_id"], description=tool["description"])) + + agent = AgentFactory.create( + name=agent["agent_name"], description=agent["agent_name"], llm_id=agent["llm_id"], tools=tools + ) + agents.append(agent) + + team_agent = TeamAgentFactory.create( + name=run_input_map["team_agent_name"], + agents=agents, + description=run_input_map["team_agent_name"], + llm_id=run_input_map["llm_id"], + use_mentalist_and_inspector=True, + ) + + assert team_agent is not None + assert team_agent.status == AssetStatus.DRAFT + + new_agent = AgentFactory.create( + name="New Agent", + description="Agent added to team", + llm_id=run_input_map["llm_id"], + ) + team_agent.agents.append(new_agent) + team_agent.update() + + team_agent = TeamAgentFactory.get(team_agent.id) + assert new_agent.id in [agent.id for agent in team_agent.agents] + assert len(team_agent.agents) == len(agents) + 1 + + removed_agent = team_agent.agents.pop(0) + team_agent.update() + + team_agent = TeamAgentFactory.get(team_agent.id) + assert removed_agent.id not in [agent.id for agent in team_agent.agents] + assert len(team_agent.agents) == len(agents) + + team_agent.delete() + new_agent.delete() From 93e24f3aadd7dcf80b3f2588ecaef760f85992aa Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Tue, 26 Nov 2024 00:44:12 +0300 Subject: [PATCH 078/105] added aixplain key (#314) * added aixplain key * fixed aixplain key * Added Exception * Remove unused file * fixed functional test bugs * Fixing headers --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/decorators/api_key_checker.py | 2 +- aixplain/enums/function.py | 9 ++----- aixplain/enums/language.py | 8 ++----- aixplain/enums/license.py | 8 ++----- aixplain/enums/supplier.py | 8 ++----- aixplain/factories/agent_factory/__init__.py | 8 +++---- aixplain/factories/asset_factory.py | 2 +- aixplain/factories/benchmark_factory.py | 23 +++++------------- aixplain/factories/corpus_factory.py | 13 ++++------ aixplain/factories/data_factory.py | 7 ++---- aixplain/factories/dataset_factory.py | 14 ++++------- .../factories/finetune_factory/__init__.py | 1 - aixplain/factories/metric_factory.py | 11 ++------- aixplain/factories/model_factory.py | 13 ++++------ .../factories/pipeline_factory/__init__.py | 24 ++++--------------- .../factories/team_agent_factory/__init__.py | 7 ++---- aixplain/factories/wallet_factory.py | 1 - aixplain/modules/benchmark_job.py | 5 +--- aixplain/modules/pipeline/generate.py | 7 ++---- aixplain/utils/config.py | 9 +++++++ aixplain/utils/file_utils.py | 8 ++----- tests/unit/agent_test.py | 2 +- tests/unit/benchmark_test.py | 4 ++-- tests/unit/corpus_test.py | 4 ++-- tests/unit/dataset_test.py | 4 ++-- tests/unit/model_test.py | 4 ++-- tests/unit/pipeline_test.py | 2 +- 27 files changed, 66 insertions(+), 142 deletions(-) diff --git a/aixplain/decorators/api_key_checker.py b/aixplain/decorators/api_key_checker.py index d2611c0e..9fb317cb 100644 --- a/aixplain/decorators/api_key_checker.py +++ b/aixplain/decorators/api_key_checker.py @@ -3,7 +3,7 @@ def check_api_key(method): def wrapper(*args, **kwargs): - if config.TEAM_API_KEY == "": + if config.TEAM_API_KEY == "" and config.AIXPLAIN_API_KEY == "": raise Exception( "A 'TEAM_API_KEY' is required to run an asset. For help, please refer to the documentation (https://github.com/aixplain/aixplain#api-key-setup)" ) diff --git a/aixplain/enums/function.py b/aixplain/enums/function.py index a6d2e40a..67b5eba0 100644 --- a/aixplain/enums/function.py +++ b/aixplain/enums/function.py @@ -31,15 +31,11 @@ def load_functions(): api_key = config.TEAM_API_KEY - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL url = urljoin(backend_url, "sdk/functions") - if aixplain_key != "": - api_key = aixplain_key - headers = {"x-aixplain-key": aixplain_key, "Content-Type": "application/json"} - else: - headers = {"x-api-key": api_key, "Content-Type": "application/json"} + + headers = {"x-api-key": api_key, "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers) if not 200 <= r.status_code < 300: raise Exception( @@ -61,5 +57,4 @@ def load_functions(): } return functions, functions_input_output - Function, FunctionInputOutput = load_functions() diff --git a/aixplain/enums/language.py b/aixplain/enums/language.py index 366d45f5..674940ab 100644 --- a/aixplain/enums/language.py +++ b/aixplain/enums/language.py @@ -31,15 +31,11 @@ def load_languages(): api_key = config.TEAM_API_KEY - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL url = urljoin(backend_url, "sdk/languages") - if aixplain_key != "": - api_key = aixplain_key - headers = {"x-aixplain-key": aixplain_key, "Content-Type": "application/json"} - else: - headers = {"x-api-key": api_key, "Content-Type": "application/json"} + + headers = {"x-api-key": api_key, "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers) if not 200 <= r.status_code < 300: raise Exception( diff --git a/aixplain/enums/license.py b/aixplain/enums/license.py index 1943ec44..14527829 100644 --- a/aixplain/enums/license.py +++ b/aixplain/enums/license.py @@ -32,15 +32,11 @@ def load_licenses(): try: api_key = config.TEAM_API_KEY - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL url = urljoin(backend_url, "sdk/licenses") - if aixplain_key != "": - api_key = aixplain_key - headers = {"x-aixplain-key": aixplain_key, "Content-Type": "application/json"} - else: - headers = {"x-api-key": api_key, "Content-Type": "application/json"} + + headers = {"x-api-key": api_key, "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers) if not 200 <= r.status_code < 300: raise Exception( diff --git a/aixplain/enums/supplier.py b/aixplain/enums/supplier.py index ecc29998..2bca01b1 100644 --- a/aixplain/enums/supplier.py +++ b/aixplain/enums/supplier.py @@ -39,15 +39,11 @@ def clean_name(name): def load_suppliers(): api_key = config.TEAM_API_KEY - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL url = urljoin(backend_url, "sdk/suppliers") - if aixplain_key != "": - api_key = aixplain_key - headers = {"x-aixplain-key": aixplain_key, "Content-Type": "application/json"} - else: - headers = {"x-api-key": api_key, "Content-Type": "application/json"} + + headers = {"x-api-key": api_key, "Content-Type": "application/json"} logging.debug(f"Start service for GET API Creation - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) if not 200 <= r.status_code < 300: diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index c56d1fd8..39ae5678 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -176,11 +176,9 @@ def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> Agent: from aixplain.factories.agent_factory.utils import build_agent url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent_id}") - if config.AIXPLAIN_API_KEY != "": - headers = {"x-aixplain-key": f"{config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} - else: - api_key = api_key if api_key is not None else config.TEAM_API_KEY - headers = {"x-api-key": api_key, "Content-Type": "application/json"} + + api_key = api_key if api_key is not None else config.TEAM_API_KEY + headers = {"x-api-key": api_key, "Content-Type": "application/json"} logging.info(f"Start service for GET Agent - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() diff --git a/aixplain/factories/asset_factory.py b/aixplain/factories/asset_factory.py index 460f7cfa..51192b2a 100644 --- a/aixplain/factories/asset_factory.py +++ b/aixplain/factories/asset_factory.py @@ -28,7 +28,7 @@ class AssetFactory: - aixplain_key = config.AIXPLAIN_API_KEY + backend_url = config.BACKEND_URL @abstractmethod diff --git a/aixplain/factories/benchmark_factory.py b/aixplain/factories/benchmark_factory.py index 305fb5d9..743ed7fa 100644 --- a/aixplain/factories/benchmark_factory.py +++ b/aixplain/factories/benchmark_factory.py @@ -43,7 +43,7 @@ class BenchmarkFactory: backend_url (str): The URL for the backend. """ - aixplain_key = config.AIXPLAIN_API_KEY + backend_url = config.BACKEND_URL @classmethod @@ -69,10 +69,8 @@ def _get_benchmark_jobs_from_benchmark_id(cls, benchmark_id: Text) -> List[Bench List[BenchmarkJob]: List of associated benchmark jobs """ url = urljoin(cls.backend_url, f"sdk/benchmarks/{benchmark_id}/jobs") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers) resp = r.json() job_list = [cls._create_benchmark_job_from_response(job_info) for job_info in resp] @@ -107,10 +105,7 @@ def get(cls, benchmark_id: str) -> Benchmark: resp = None try: url = urljoin(cls.backend_url, f"sdk/benchmarks/{benchmark_id}") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for GET Benchmark - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() @@ -145,10 +140,7 @@ def get_job(cls, job_id: Text) -> BenchmarkJob: BenchmarkJob: Created 'BenchmarkJob' object """ url = urljoin(cls.backend_url, f"sdk/benchmarks/jobs/{job_id}") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers) resp = r.json() benchmarkJob = cls._create_benchmark_job_from_response(resp) @@ -235,10 +227,7 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: """ try: url = urljoin(cls.backend_url, "sdk/benchmarks/normalization-options") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} payload = json.dumps({"metricId": metric.id, "modelIds": [model.id]}) r = _request_with_retry("post", url, headers=headers, data=payload) resp = r.json() diff --git a/aixplain/factories/corpus_factory.py b/aixplain/factories/corpus_factory.py index 3b9c5e4b..db7aa44e 100644 --- a/aixplain/factories/corpus_factory.py +++ b/aixplain/factories/corpus_factory.py @@ -48,7 +48,6 @@ class CorpusFactory(AssetFactory): - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL @classmethod @@ -117,10 +116,8 @@ def get(cls, corpus_id: Text) -> Corpus: """ try: url = urljoin(cls.backend_url, f"sdk/corpora/{corpus_id}/overview") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for GET Corpus - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() @@ -172,10 +169,8 @@ def list( Dict: list of corpora in agreement with the filters, page number, page total and total elements """ url = urljoin(cls.backend_url, "sdk/corpora/paginate") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} assert 0 < page_size <= 100, "Corpus List Error: Page size must be greater than 0 and not exceed 100." payload = {"pageSize": page_size, "pageNumber": page_number, "sort": [{"field": "createdAt", "dir": -1}]} diff --git a/aixplain/factories/data_factory.py b/aixplain/factories/data_factory.py index 3f512aaf..1879b321 100644 --- a/aixplain/factories/data_factory.py +++ b/aixplain/factories/data_factory.py @@ -46,7 +46,6 @@ class DataFactory(AssetFactory): backend_url (str): The URL for the backend. """ - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL @classmethod @@ -92,10 +91,8 @@ def get(cls, data_id: Text) -> Data: Data: Created 'Data' object """ url = urljoin(cls.backend_url, f"sdk/data/{data_id}/overview") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for GET Data - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() diff --git a/aixplain/factories/dataset_factory.py b/aixplain/factories/dataset_factory.py index 081513c0..c7ccad70 100644 --- a/aixplain/factories/dataset_factory.py +++ b/aixplain/factories/dataset_factory.py @@ -57,7 +57,7 @@ class DatasetFactory(AssetFactory): backend_url (str): The URL for the backend. """ - aixplain_key = config.AIXPLAIN_API_KEY + backend_url = config.BACKEND_URL @classmethod @@ -164,10 +164,8 @@ def get(cls, dataset_id: Text) -> Dataset: """ try: url = urljoin(cls.backend_url, f"sdk/datasets/{dataset_id}/overview") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for GET Dataset - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() @@ -214,10 +212,8 @@ def list( Dict: list of datasets in agreement with the filters, page number, page total and total elements """ url = urljoin(cls.backend_url, "sdk/datasets/paginate") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} assert 0 < page_size <= 100, "Dataset List Error: Page size must be greater than 0 and not exceed 100." payload = { diff --git a/aixplain/factories/finetune_factory/__init__.py b/aixplain/factories/finetune_factory/__init__.py index 7a23c527..238d0d0c 100644 --- a/aixplain/factories/finetune_factory/__init__.py +++ b/aixplain/factories/finetune_factory/__init__.py @@ -44,7 +44,6 @@ class FinetuneFactory: backend_url (str): The URL for the backend. """ - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL @classmethod diff --git a/aixplain/factories/metric_factory.py b/aixplain/factories/metric_factory.py index a0372827..9f42fb3e 100644 --- a/aixplain/factories/metric_factory.py +++ b/aixplain/factories/metric_factory.py @@ -39,7 +39,6 @@ class MetricFactory: backend_url (str): The URL for the backend. """ - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL @classmethod @@ -76,10 +75,7 @@ def get(cls, metric_id: Text) -> Metric: resp, status_code = None, 200 try: url = urljoin(cls.backend_url, f"sdk/metrics/{metric_id}") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for GET Metric - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() @@ -126,10 +122,7 @@ def list( if is_reference_required is not None: filter_params["referenceRequired"] = 1 if is_reference_required else 0 - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers, params=filter_params) resp = r.json() logging.info(f"Listing Metrics: Status of getting metrics: {resp}") diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index b6588023..052750a7 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -41,7 +41,6 @@ class ModelFactory: backend_url (str): The URL for the backend. """ - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL @classmethod @@ -107,10 +106,8 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: resp = None try: url = urljoin(cls.backend_url, f"sdk/models/{model_id}") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for GET Model - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() @@ -196,10 +193,8 @@ def _get_assets_from_page( filter_params["sort"] = [{"dir": sort_order.value, "field": sort_by.value}] if len(lang_filter_params) != 0: filter_params["ioFilter"] = lang_filter_params - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} logging.info(f"Start service for POST Models Paginate - {url} - {headers} - {json.dumps(filter_params)}") r = _request_with_retry("post", url, headers=headers, json=filter_params) diff --git a/aixplain/factories/pipeline_factory/__init__.py b/aixplain/factories/pipeline_factory/__init__.py index ef330de0..f960d6da 100644 --- a/aixplain/factories/pipeline_factory/__init__.py +++ b/aixplain/factories/pipeline_factory/__init__.py @@ -43,7 +43,6 @@ class PipelineFactory: backend_url (str): The URL for the backend. """ - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL @classmethod @@ -65,11 +64,6 @@ def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: "Authorization": f"Token {api_key}", "Content-Type": "application/json", } - elif cls.aixplain_key != "": - headers = { - "x-aixplain-key": f"{cls.aixplain_key}", - "Content-Type": "application/json", - } else: headers = { "Authorization": f"Token {config.TEAM_API_KEY}", @@ -125,13 +119,8 @@ def get_assets_from_page(cls, page_number: int) -> List[Pipeline]: """ try: url = urljoin(cls.backend_url, f"sdk/pipelines/?pageNumber={page_number}") - if cls.aixplain_key != "": - headers = { - "x-aixplain-key": f"{cls.aixplain_key}", - "Content-Type": "application/json", - } - else: - headers = { + + headers = { "Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json", } @@ -181,13 +170,8 @@ def list( ) -> Dict: url = urljoin(cls.backend_url, "sdk/pipelines/paginate") - if cls.aixplain_key != "": - headers = { - "x-aixplain-key": f"{cls.aixplain_key}", - "Content-Type": "application/json", - } - else: - headers = { + + headers = { "Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json", } diff --git a/aixplain/factories/team_agent_factory/__init__.py b/aixplain/factories/team_agent_factory/__init__.py index 3f65b4b0..0819d989 100644 --- a/aixplain/factories/team_agent_factory/__init__.py +++ b/aixplain/factories/team_agent_factory/__init__.py @@ -153,11 +153,8 @@ def list(cls) -> Dict: def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> TeamAgent: """Get agent by id.""" url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{agent_id}") - if config.AIXPLAIN_API_KEY != "": - headers = {"x-aixplain-key": f"{config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} - else: - api_key = api_key if api_key is not None else config.TEAM_API_KEY - headers = {"x-api-key": api_key, "Content-Type": "application/json"} + api_key = api_key if api_key is not None else config.TEAM_API_KEY + headers = {"x-api-key": api_key, "Content-Type": "application/json"} logging.info(f"Start service for GET Team Agent - {url} - {headers}") try: r = _request_with_retry("get", url, headers=headers) diff --git a/aixplain/factories/wallet_factory.py b/aixplain/factories/wallet_factory.py index 01c0ac2e..1591dc2e 100644 --- a/aixplain/factories/wallet_factory.py +++ b/aixplain/factories/wallet_factory.py @@ -6,7 +6,6 @@ class WalletFactory: - aixplain_key = config.AIXPLAIN_API_KEY backend_url = config.BACKEND_URL @classmethod diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index c06063fc..8fe13a19 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -40,10 +40,7 @@ def _create_benchmark_job_from_response(cls, response: Dict): @classmethod def _fetch_current_response(cls, job_id: Text) -> dict: url = urljoin(config.BACKEND_URL, f"sdk/benchmarks/jobs/{job_id}") - if config.AIXPLAIN_API_KEY != "": - headers = {"x-aixplain-key": f"{config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers) resp = r.json() return resp diff --git a/aixplain/modules/pipeline/generate.py b/aixplain/modules/pipeline/generate.py index 46c95482..8bfeecb3 100644 --- a/aixplain/modules/pipeline/generate.py +++ b/aixplain/modules/pipeline/generate.py @@ -103,7 +103,7 @@ def fetch_functions(): Fetch functions from the backend """ api_key = config.TEAM_API_KEY - aixplain_key = config.AIXPLAIN_API_KEY + backend_url = config.BACKEND_URL url = urljoin(backend_url, "sdk/functions") @@ -111,10 +111,7 @@ def fetch_functions(): "Content-Type": "application/json", } - if aixplain_key: - headers["x-aixplain-key"] = aixplain_key - else: - headers["x-api-key"] = api_key + headers["x-api-key"] = api_key r = requests.get(url, headers=headers) try: diff --git a/aixplain/utils/config.py b/aixplain/utils/config.py index 03bbdccf..b47bc4f7 100644 --- a/aixplain/utils/config.py +++ b/aixplain/utils/config.py @@ -23,6 +23,15 @@ # GET THE API KEY FROM CMD TEAM_API_KEY = os.getenv("TEAM_API_KEY", "") AIXPLAIN_API_KEY = os.getenv("AIXPLAIN_API_KEY", "") + +if AIXPLAIN_API_KEY and TEAM_API_KEY: + if AIXPLAIN_API_KEY != TEAM_API_KEY: + raise Exception("Conflicting API keys: 'AIXPLAIN_API_KEY' and 'TEAM_API_KEY' are both provided but do not match. Please provide only one API key.") + + +if AIXPLAIN_API_KEY and not TEAM_API_KEY: + TEAM_API_KEY = AIXPLAIN_API_KEY + PIPELINE_API_KEY = os.getenv("PIPELINE_API_KEY", "") MODEL_API_KEY = os.getenv("MODEL_API_KEY", "") LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") diff --git a/aixplain/utils/file_utils.py b/aixplain/utils/file_utils.py index 02ddc5ef..0e617397 100644 --- a/aixplain/utils/file_utils.py +++ b/aixplain/utils/file_utils.py @@ -122,12 +122,8 @@ def upload_data( tags = [] payload = {"contentType": content_type, "originalName": file_name, "tags": ",".join(tags), "license": license.value} - if config.AIXPLAIN_API_KEY != "": - team_key = config.AIXPLAIN_API_KEY - headers = {"x-aixplain-key": team_key} - else: - team_key = config.TEAM_API_KEY - headers = {"Authorization": "token " + team_key} + team_key = config.TEAM_API_KEY + headers = {"Authorization": "token " + team_key} r = _request_with_retry("post", url, headers=headers, data=payload) response = r.json() diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index cb60c620..ce1eac63 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -235,7 +235,7 @@ def test_run_success(): url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}/run") agent.url = url with requests_mock.Mocker() as mock: - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"x-api-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} ref_response = {"data": "www.aixplain.com", "status": "IN_PROGRESS"} mock.post(url, headers=headers, json=ref_response) diff --git a/tests/unit/benchmark_test.py b/tests/unit/benchmark_test.py index 167e4bcb..08a91ea3 100644 --- a/tests/unit/benchmark_test.py +++ b/tests/unit/benchmark_test.py @@ -42,7 +42,7 @@ def test_get_benchmark_error(): with requests_mock.Mocker() as mock: benchmark_id = "test-benchmark-id" url = urljoin(config.BACKEND_URL, f"sdk/benchmarks/{benchmark_id}") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"statusCode": 404, "message": "Benchmark not found"} mock.get(url, headers=headers, json=error_response, status_code=404) @@ -59,7 +59,7 @@ def test_list_normalization_options_error(): model = Model(id="model1", name="Test Model", description="Test model", supplier="Test supplier", cost=10, version="v1") url = urljoin(config.BACKEND_URL, "sdk/benchmarks/normalization-options") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"message": "Internal Server Error"} mock.post(url, headers=headers, json=error_response, status_code=500) diff --git a/tests/unit/corpus_test.py b/tests/unit/corpus_test.py index 07522c4d..bc240382 100644 --- a/tests/unit/corpus_test.py +++ b/tests/unit/corpus_test.py @@ -9,7 +9,7 @@ def test_get_corpus_error_response(): with requests_mock.Mocker() as mock: corpus_id = "invalid_corpus_id" url = urljoin(config.BACKEND_URL, f"sdk/corpora/{corpus_id}/overview") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"message": "Not Found"} mock.get(url, headers=headers, json=error_response, status_code=404) @@ -23,7 +23,7 @@ def test_get_corpus_error_response(): def test_list_corpus_error_response(): with requests_mock.Mocker() as mock: url = urljoin(config.BACKEND_URL, "sdk/corpora/paginate") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"message": "Internal Server Error"} mock.post(url, headers=headers, json=error_response, status_code=500) diff --git a/tests/unit/dataset_test.py b/tests/unit/dataset_test.py index 25c57123..721a405c 100644 --- a/tests/unit/dataset_test.py +++ b/tests/unit/dataset_test.py @@ -8,7 +8,7 @@ def test_list_dataset_error_response(): with requests_mock.Mocker() as mock: url = urljoin(config.BACKEND_URL, "sdk/datasets/paginate") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"message": "Internal Server Error"} mock.post(url, headers=headers, json=error_response, status_code=500) @@ -23,7 +23,7 @@ def test_get_dataset_error_response(): with requests_mock.Mocker() as mock: dataset_id = "invalid_dataset_id" url = urljoin(config.BACKEND_URL, f"sdk/datasets/{dataset_id}/overview") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"message": "Not Found"} mock.get(url, headers=headers, json=error_response, status_code=404) diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index a2463a8d..452d9ac5 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -152,7 +152,7 @@ def test_get_model_error_response(): with requests_mock.Mocker() as mock: model_id = "test-model-id" url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"statusCode": 404, "message": "Model not found"} mock.get(url, headers=headers, json=error_response, status_code=404) @@ -169,7 +169,7 @@ def test_get_assets_from_page_error(): page_number = 0 page_size = 2 url = urljoin(config.BACKEND_URL, "sdk/models/paginate") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"statusCode": 500, "message": "Internal Server Error"} mock.post(url, headers=headers, json=error_response, status_code=500) diff --git a/tests/unit/pipeline_test.py b/tests/unit/pipeline_test.py index 05ee7172..d1b0f9b2 100644 --- a/tests/unit/pipeline_test.py +++ b/tests/unit/pipeline_test.py @@ -72,7 +72,7 @@ def test_list_pipelines_error_response(): page_number = 0 page_size = 20 url = urljoin(config.BACKEND_URL, "sdk/pipelines/paginate") - headers = {"x-aixplain-key": config.AIXPLAIN_API_KEY, "Content-Type": "application/json"} + headers = {"Authorization": f"Token {config.AIXPLAIN_API_KEY}", "Content-Type": "application/json"} error_response = {"statusCode": 400, "message": "Bad Request"} mock.post(url, headers=headers, json=error_response, status_code=400) From 540b0a198795ad344d38a9e3cb4c328b787b7879 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:29:57 -0300 Subject: [PATCH 079/105] ENG-1110: Onboard Utilities (#321) * Onboard Utilities * Tests for utility models --- aixplain/enums/data_type.py | 1 + .../__init__.py} | 175 +++++------------ aixplain/factories/model_factory/utils.py | 142 ++++++++++++++ aixplain/modules/model/utility_model.py | 184 ++++++++++++++++++ .../model/run_utility_model_test.py | 33 ++++ tests/unit/model_test.py | 4 +- tests/unit/utility_test.py | 99 ++++++++++ 7 files changed, 509 insertions(+), 129 deletions(-) rename aixplain/factories/{model_factory.py => model_factory/__init__.py} (74%) create mode 100644 aixplain/factories/model_factory/utils.py create mode 100644 aixplain/modules/model/utility_model.py create mode 100644 tests/functional/model/run_utility_model_test.py create mode 100644 tests/unit/utility_test.py diff --git a/aixplain/enums/data_type.py b/aixplain/enums/data_type.py index 11432bcf..dcae0422 100644 --- a/aixplain/enums/data_type.py +++ b/aixplain/enums/data_type.py @@ -35,6 +35,7 @@ class DataType(str, Enum): VIDEO = "video" EMBEDDING = "embedding" NUMBER = "number" + BOOLEAN = "boolean" def __str__(self): return self._value_ diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory/__init__.py similarity index 74% rename from aixplain/factories/model_factory.py rename to aixplain/factories/model_factory/__init__.py index 052750a7..8ec3183b 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory/__init__.py @@ -24,14 +24,11 @@ import json import logging from aixplain.modules.model import Model -from aixplain.modules.model.llm_model import LLM +from aixplain.modules.model.utility_model import UtilityModel, UtilityModelInput from aixplain.enums import Function, Language, OwnershipType, Supplier, SortBy, SortOrder from aixplain.utils import config from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin -from warnings import warn -from aixplain.enums.function import FunctionInputOutput -from datetime import datetime class ModelFactory: @@ -44,53 +41,52 @@ class ModelFactory: backend_url = config.BACKEND_URL @classmethod - def _create_model_from_response(cls, response: Dict) -> Model: - """Converts response Json to 'Model' object + def create_utility_model( + cls, name: Text, description: Text, inputs: List[UtilityModelInput], code: Text, output_description: Text + ) -> UtilityModel: + """Create a utility model Args: - response (Dict): Json from API + name (Text): name of the model + description (Text): description of the model + inputs (List[UtilityModelInput]): inputs of the model + code (Text): code of the model + output_description (Text): description of the output Returns: - Model: Coverted 'Model' object + UtilityModel: created utility model """ - if "api_key" not in response: - response["api_key"] = config.TEAM_API_KEY - - parameters = {} - if "params" in response: - for param in response["params"]: - if "language" in param["name"]: - parameters[param["name"]] = [w["value"] for w in param["values"]] - - function = Function(response["function"]["id"]) - ModelClass = Model - if function == Function.TEXT_GENERATION: - ModelClass = LLM - - created_at = None - if "createdAt" in response and response["createdAt"]: - created_at = datetime.fromisoformat(response["createdAt"].replace("Z", "+00:00")) - function_id = response["function"]["id"] - function = Function(function_id) - function_io = FunctionInputOutput.get(function_id, None) - input_params = {param["code"]: param for param in function_io["spec"]["params"]} - output_params = {param["code"]: param for param in function_io["spec"]["output"]} - - return ModelClass( - response["id"], - response["name"], - description=response.get("description", ""), - supplier=response["supplier"], - api_key=response["api_key"], - cost=response["pricing"], - function=function, - created_at=created_at, - parameters=parameters, - input_params=input_params, - output_params=output_params, - is_subscribed=True if "subscription" in response else False, - version=response["version"]["id"], + utility_model = UtilityModel( + id="", + name=name, + description=description, + inputs=inputs, + code=code, + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + output_description=output_description, ) + payload = utility_model.to_dict() + url = urljoin(cls.backend_url, "sdk/utilities") + headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} + try: + logging.info(f"Start service for POST Utility Model - {url} - {headers} - {payload}") + r = _request_with_retry("post", url, headers=headers, json=payload) + resp = r.json() + except Exception as e: + logging.error(f"Error creating utility model: {e}") + raise e + + if 200 <= r.status_code < 300: + utility_model.id = resp["id"] + logging.info(f"Utility Model Creation: Model {utility_model.id} instantiated.") + return utility_model + else: + error_message = ( + f"Utility Model Creation: Failed to create utility model. Status Code: {r.status_code}. Error: {resp}" + ) + logging.error(error_message) + raise Exception(error_message) @classmethod def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: @@ -125,7 +121,9 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: resp["api_key"] = config.TEAM_API_KEY if api_key is not None: resp["api_key"] = api_key - model = cls._create_model_from_response(resp) + from aixplain.factories.model_factory.utils import create_model_from_response + + model = create_model_from_response(resp) logging.info(f"Model Creation: Model {model_id} instantiated.") return model else: @@ -133,87 +131,6 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: logging.error(error_message) raise Exception(error_message) - @classmethod - def create_asset_from_id(cls, model_id: Text) -> Model: - warn( - 'This method will be deprecated in the next versions of the SDK. Use "get" instead.', - DeprecationWarning, - stacklevel=2, - ) - return cls.get(model_id) - - @classmethod - def _get_assets_from_page( - cls, - query, - page_number: int, - page_size: int, - function: Function, - suppliers: Union[Supplier, List[Supplier]], - source_languages: Union[Language, List[Language]], - target_languages: Union[Language, List[Language]], - is_finetunable: bool = None, - ownership: Optional[Tuple[OwnershipType, List[OwnershipType]]] = None, - sort_by: Optional[SortBy] = None, - sort_order: SortOrder = SortOrder.ASCENDING, - ) -> List[Model]: - try: - url = urljoin(cls.backend_url, "sdk/models/paginate") - filter_params = {"q": query, "pageNumber": page_number, "pageSize": page_size} - if is_finetunable is not None: - filter_params["isFineTunable"] = is_finetunable - if function is not None: - filter_params["functions"] = [function.value] - if suppliers is not None: - if isinstance(suppliers, Supplier) is True: - suppliers = [suppliers] - filter_params["suppliers"] = [supplier.value["id"] for supplier in suppliers] - if ownership is not None: - if isinstance(ownership, OwnershipType) is True: - ownership = [ownership] - filter_params["ownership"] = [ownership_.value for ownership_ in ownership] - - lang_filter_params = [] - if source_languages is not None: - if isinstance(source_languages, Language): - source_languages = [source_languages] - if function == Function.TRANSLATION: - lang_filter_params.append({"code": "sourcelanguage", "value": source_languages[0].value["language"]}) - else: - lang_filter_params.append({"code": "language", "value": source_languages[0].value["language"]}) - if source_languages[0].value["dialect"] != "": - lang_filter_params.append({"code": "dialect", "value": source_languages[0].value["dialect"]}) - if target_languages is not None: - if isinstance(target_languages, Language): - target_languages = [target_languages] - if function == Function.TRANSLATION: - code = "targetlanguage" - lang_filter_params.append({"code": code, "value": target_languages[0].value["language"]}) - if sort_by is not None: - filter_params["sort"] = [{"dir": sort_order.value, "field": sort_by.value}] - if len(lang_filter_params) != 0: - filter_params["ioFilter"] = lang_filter_params - - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - - logging.info(f"Start service for POST Models Paginate - {url} - {headers} - {json.dumps(filter_params)}") - r = _request_with_retry("post", url, headers=headers, json=filter_params) - resp = r.json() - - except Exception as e: - error_message = f"Listing Models: Error in getting Models on Page {page_number}: {e}" - logging.error(error_message, exc_info=True) - return [] - if 200 <= r.status_code < 300: - logging.info(f"Listing Models: Status of getting Models on Page {page_number}: {r.status_code}") - all_models = resp["items"] - model_list = [cls._create_model_from_response(model_info_json) for model_info_json in all_models] - return model_list, resp["total"] - else: - error_message = f"Listing Models Error: Failed to retrieve models. Status Code: {r.status_code}. Error: {resp}" - logging.error(error_message) - raise Exception(error_message) - @classmethod def list( cls, @@ -244,7 +161,9 @@ def list( Returns: List[Model]: List of models based on given filters """ - models, total = cls._get_assets_from_page( + from aixplain.factories.model_factory.utils import get_assets_from_page + + models, total = get_assets_from_page( query, page_number, page_size, diff --git a/aixplain/factories/model_factory/utils.py b/aixplain/factories/model_factory/utils.py new file mode 100644 index 00000000..01423795 --- /dev/null +++ b/aixplain/factories/model_factory/utils.py @@ -0,0 +1,142 @@ +import json +import logging +from aixplain.modules.model import Model +from aixplain.modules.model.llm_model import LLM +from aixplain.modules.model.utility_model import UtilityModel, UtilityModelInput +from aixplain.enums import DataType, Function, Language, OwnershipType, Supplier, SortBy, SortOrder +from aixplain.utils import config +from aixplain.utils.file_utils import _request_with_retry +from aixplain.enums.function import FunctionInputOutput +from datetime import datetime +from typing import Dict, Union, List, Optional, Tuple +from urllib.parse import urljoin + + +def create_model_from_response(response: Dict) -> Model: + """Converts response Json to 'Model' object + + Args: + response (Dict): Json from API + + Returns: + Model: Coverted 'Model' object + """ + if "api_key" not in response: + response["api_key"] = config.TEAM_API_KEY + + parameters = {} + if "params" in response: + for param in response["params"]: + if "language" in param["name"]: + parameters[param["name"]] = [w["value"] for w in param["values"]] + + function = Function(response["function"]["id"]) + inputs = [] + ModelClass = Model + if function == Function.TEXT_GENERATION: + ModelClass = LLM + elif function == Function.UTILITIES: + ModelClass = UtilityModel + inputs = [ + UtilityModelInput(name=param["name"], description=param.get("description", ""), type=DataType(param["dataType"])) + for param in response["params"] + ] + + created_at = None + if "createdAt" in response and response["createdAt"]: + created_at = datetime.fromisoformat(response["createdAt"].replace("Z", "+00:00")) + function_id = response["function"]["id"] + function = Function(function_id) + function_io = FunctionInputOutput.get(function_id, None) + input_params = {param["code"]: param for param in function_io["spec"]["params"]} + output_params = {param["code"]: param for param in function_io["spec"]["output"]} + + return ModelClass( + response["id"], + response["name"], + description=response.get("description", ""), + code=response.get("code", ""), + supplier=response["supplier"], + api_key=response["api_key"], + cost=response["pricing"], + function=function, + created_at=created_at, + parameters=parameters, + input_params=input_params, + output_params=output_params, + is_subscribed=True if "subscription" in response else False, + version=response["version"]["id"], + inputs=inputs, + ) + + +def get_assets_from_page( + query, + page_number: int, + page_size: int, + function: Function, + suppliers: Union[Supplier, List[Supplier]], + source_languages: Union[Language, List[Language]], + target_languages: Union[Language, List[Language]], + is_finetunable: bool = None, + ownership: Optional[Tuple[OwnershipType, List[OwnershipType]]] = None, + sort_by: Optional[SortBy] = None, + sort_order: SortOrder = SortOrder.ASCENDING, +) -> List[Model]: + try: + url = urljoin(config.BACKEND_URL, "sdk/models/paginate") + filter_params = {"q": query, "pageNumber": page_number, "pageSize": page_size} + if is_finetunable is not None: + filter_params["isFineTunable"] = is_finetunable + if function is not None: + filter_params["functions"] = [function.value] + if suppliers is not None: + if isinstance(suppliers, Supplier) is True: + suppliers = [suppliers] + filter_params["suppliers"] = [supplier.value["id"] for supplier in suppliers] + if ownership is not None: + if isinstance(ownership, OwnershipType) is True: + ownership = [ownership] + filter_params["ownership"] = [ownership_.value for ownership_ in ownership] + + lang_filter_params = [] + if source_languages is not None: + if isinstance(source_languages, Language): + source_languages = [source_languages] + if function == Function.TRANSLATION: + lang_filter_params.append({"code": "sourcelanguage", "value": source_languages[0].value["language"]}) + else: + lang_filter_params.append({"code": "language", "value": source_languages[0].value["language"]}) + if source_languages[0].value["dialect"] != "": + lang_filter_params.append({"code": "dialect", "value": source_languages[0].value["dialect"]}) + if target_languages is not None: + if isinstance(target_languages, Language): + target_languages = [target_languages] + if function == Function.TRANSLATION: + code = "targetlanguage" + lang_filter_params.append({"code": code, "value": target_languages[0].value["language"]}) + if sort_by is not None: + filter_params["sort"] = [{"dir": sort_order.value, "field": sort_by.value}] + if len(lang_filter_params) != 0: + filter_params["ioFilter"] = lang_filter_params + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + logging.info(f"Start service for POST Models Paginate - {url} - {headers} - {json.dumps(filter_params)}") + r = _request_with_retry("post", url, headers=headers, json=filter_params) + resp = r.json() + + except Exception as e: + error_message = f"Listing Models: Error in getting Models on Page {page_number}: {e}" + logging.error(error_message, exc_info=True) + return [] + if 200 <= r.status_code < 300: + logging.info(f"Listing Models: Status of getting Models on Page {page_number}: {r.status_code}") + all_models = resp["items"] + from aixplain.factories.model_factory.utils import create_model_from_response + + model_list = [create_model_from_response(model_info_json) for model_info_json in all_models] + return model_list, resp["total"] + else: + error_message = f"Listing Models Error: Failed to retrieve models. Status Code: {r.status_code}. Error: {resp}" + logging.error(error_message) + raise Exception(error_message) diff --git a/aixplain/modules/model/utility_model.py b/aixplain/modules/model/utility_model.py new file mode 100644 index 00000000..31bc6058 --- /dev/null +++ b/aixplain/modules/model/utility_model.py @@ -0,0 +1,184 @@ +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Thiago Castro Ferreira, Shreyas Sharma and Lucas Pavanelli +Date: November 25th 2024 +Description: + Utility Model Class +""" +import logging +import os +import validators +from aixplain.enums import Function, Supplier, DataType +from aixplain.modules.model import Model +from aixplain.utils import config +from aixplain.utils.file_utils import _request_with_retry +from dataclasses import dataclass +from typing import Union, Optional, List, Text, Dict +from urllib.parse import urljoin + + +@dataclass +class UtilityModelInput: + name: Text + description: Text + type: DataType = DataType.TEXT + + def __post_init__(self): + self.validate_type() + + def validate_type(self): + if self.type not in [DataType.TEXT, DataType.BOOLEAN, DataType.NUMBER]: + raise ValueError("Utility Model Input type must be TEXT, BOOLEAN or NUMBER") + + def to_dict(self): + return {"name": self.name, "description": self.description, "type": self.type.value} + + +class UtilityModel(Model): + """Ready-to-use Utility Model. + + Attributes: + id (Text): ID of the Model + name (Text): Name of the Model + description (Text, optional): description of the model. Defaults to "". + api_key (Text, optional): API key of the Model. Defaults to None. + url (Text, optional): endpoint of the model. Defaults to config.MODELS_RUN_URL. + supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". + version (Text, optional): version of the model. Defaults to "1.0". + function (Text, optional): model AI function. Defaults to None. + url (str): URL to run the model. + backend_url (str): URL of the backend. + pricing (Dict, optional): model price. Defaults to None. + **additional_info: Any additional Model info to be saved + """ + + def __init__( + self, + id: Text, + name: Text, + description: Text, + code: Text, + inputs: List[UtilityModelInput], + output_description: Text, + api_key: Optional[Text] = None, + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", + version: Optional[Text] = None, + function: Optional[Function] = None, + is_subscribed: bool = False, + cost: Optional[Dict] = None, + **additional_info, + ) -> None: + """Utility Model Init + + Args: + id (Text): ID of the Model + name (Text): Name of the Model + description (Text): description of the model. + code (Text): code of the model. + inputs (List[UtilityModelInput]): inputs of the model. + output_description (Text): description of the output + api_key (Text, optional): API key of the Model. Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". + version (Text, optional): version of the model. Defaults to "1.0". + function (Function, optional): model AI function. Defaults to None. + is_subscribed (bool, optional): Is the user subscribed. Defaults to False. + cost (Dict, optional): model price. Defaults to None. + **additional_info: Any additional Model info to be saved + """ + assert function == Function.UTILITIES, "Utility Model only supports 'utilities' function" + super().__init__( + id=id, + name=name, + description=description, + supplier=supplier, + version=version, + cost=cost, + function=function, + is_subscribed=is_subscribed, + api_key=api_key, + **additional_info, + ) + self.url = config.MODELS_RUN_URL + self.backend_url = config.BACKEND_URL + self.code = code + self.inputs = inputs + self.output_description = output_description + self.validate() + + def validate(self): + from aixplain.factories.file_factory import FileFactory + from uuid import uuid4 + + assert self.name and self.name.strip() != "", "Name is required" + assert self.description and self.description.strip() != "", "Description is required" + assert self.code and self.code.strip() != "", "Code is required" + assert self.inputs and len(self.inputs) > 0, "At least one input is required" + assert self.output_description and self.output_description.strip() != "", "Output description is required" + + self.code = FileFactory.to_link(self.code) + # store code in a temporary local path if it is not a valid URL or S3 path + if not validators.url(self.code) and not self.code.startswith("s3:"): + local_path = str(uuid4()) + with open(local_path, "w") as f: + f.write(self.code) + self.code = FileFactory.upload(local_path=local_path, is_temp=True) + os.remove(local_path) + + def to_dict(self): + return { + "name": self.name, + "description": self.description, + "inputs": [input.to_dict() for input in self.inputs], + "code": self.code, + "function": self.function.value, + "outputDescription": self.output_description, + } + + def update(self): + self.validate() + url = urljoin(self.backend_url, f"sdk/utilities/{self.id}") + headers = {"x-api-key": f"{self.api_key}", "Content-Type": "application/json"} + payload = self.to_dict() + try: + logging.info(f"Start service for PUT Utility Model - {url} - {headers} - {payload}") + r = _request_with_retry("put", url, headers=headers, json=payload) + response = r.json() + except Exception as e: + message = f"Utility Model Update Error: {e}" + logging.error(message) + raise Exception(f"{message}") + + if not 200 <= r.status_code < 300: + message = f"Utility Model Update Error: {response}" + logging.error(message) + raise Exception(f"{message}") + + def delete(self): + url = urljoin(self.backend_url, f"sdk/utilities/{self.id}") + headers = {"x-api-key": f"{self.api_key}", "Content-Type": "application/json"} + try: + logging.info(f"Start service for DELETE Utility Model - {url} - {headers}") + r = _request_with_retry("delete", url, headers=headers) + response = r.json() + except Exception: + message = "Utility Model Deletion Error: Make sure the utility model exists and you are the owner." + logging.error(message) + raise Exception(f"{message}") + + if r.status_code != 200: + message = f"Utility Model Deletion Error: {response}" + logging.error(message) + raise Exception(f"{message}") diff --git a/tests/functional/model/run_utility_model_test.py b/tests/functional/model/run_utility_model_test.py new file mode 100644 index 00000000..5887c4ca --- /dev/null +++ b/tests/functional/model/run_utility_model_test.py @@ -0,0 +1,33 @@ +from aixplain.factories import ModelFactory +from aixplain.modules.model.utility_model import UtilityModelInput +from aixplain.enums import DataType + + +def test_run_utility_model(): + inputs = [ + UtilityModelInput(name="inputA", description="input A is the only input", type=DataType.TEXT), + ] + + output_description = "An example is 'test'" + + utility_model = ModelFactory.create_utility_model( + name="test_script", + description="This is a test script", + inputs=inputs, + code="def main(inputA):\n\treturn inputA", + output_description=output_description, + ) + + assert utility_model.id is not None + + response = utility_model.run(data={"inputA": "test"}) + assert response.status == "SUCCESS" + assert response.data == "test" + + utility_model.code = "def main(inputA):\n\treturn 5" + utility_model.update() + response = utility_model.run(data={"inputA": "test"}) + assert response.status == "SUCCESS" + assert str(response.data) == "5" + + utility_model.delete() diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index 452d9ac5..0ddf6345 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -164,6 +164,8 @@ def test_get_model_error_response(): def test_get_assets_from_page_error(): + from aixplain.factories.model_factory.utils import get_assets_from_page + with requests_mock.Mocker() as mock: query = "test-query" page_number = 0 @@ -175,7 +177,7 @@ def test_get_assets_from_page_error(): mock.post(url, headers=headers, json=error_response, status_code=500) with pytest.raises(Exception) as excinfo: - ModelFactory._get_assets_from_page( + get_assets_from_page( query=query, page_number=page_number, page_size=page_size, diff --git a/tests/unit/utility_test.py b/tests/unit/utility_test.py new file mode 100644 index 00000000..c1b7b9e1 --- /dev/null +++ b/tests/unit/utility_test.py @@ -0,0 +1,99 @@ +import pytest +import requests_mock +from aixplain.factories.model_factory import ModelFactory +from urllib.parse import urljoin +from aixplain.utils import config +from aixplain.enums import DataType, Function +from aixplain.modules.model.utility_model import UtilityModel, UtilityModelInput +from unittest.mock import patch + + +def test_utility_model(): + with requests_mock.Mocker() as mock: + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="utility_model_test"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="utility_model_test"): + mock.post(urljoin(config.BACKEND_URL, "sdk/utilities"), json={"id": "123"}) + utility_model = ModelFactory.create_utility_model( + name="utility_model_test", + description="utility_model_test", + code="utility_model_test", + inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + output_description="output_description", + ) + assert utility_model.id == "123" + assert utility_model.name == "utility_model_test" + assert utility_model.description == "utility_model_test" + assert utility_model.code == "utility_model_test" + assert utility_model.inputs == [ + UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT) + ] + assert utility_model.output_description == "output_description" + + +def test_utility_model_with_invalid_name(): + with pytest.raises(Exception) as exc_info: + ModelFactory.create_utility_model( + name="", + description="utility_model_test", + code="utility_model_test", + inputs=[], + output_description="output_description", + ) + assert str(exc_info.value) == "Name is required" + + +def test_utility_model_with_invalid_inputs(): + with pytest.raises(Exception) as exc_info: + ModelFactory.create_utility_model( + name="utility_model_test", + description="utility_model_test", + code="utility_model_test", + inputs=[], + output_description="output_description", + ) + assert str(exc_info.value) == "At least one input is required" + + +def test_utility_model_to_dict(): + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="utility_model_test"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="utility_model_test"): + utility_model = UtilityModel( + id="123", + name="utility_model_test", + description="utility_model_test", + code="utility_model_test", + output_description="output_description", + inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + assert utility_model.to_dict() == { + "name": "utility_model_test", + "description": "utility_model_test", + "inputs": [{"name": "originCode", "description": "originCode", "type": "text"}], + "code": "utility_model_test", + "function": "utilities", + "outputDescription": "output_description", + } + + +def test_update_utility_model(): + with requests_mock.Mocker() as mock: + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="utility_model_test"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="utility_model_test"): + mock.put(urljoin(config.BACKEND_URL, "sdk/utilities/123"), json={"id": "123"}) + utility_model = UtilityModel( + id="123", + name="utility_model_test", + description="utility_model_test", + code="utility_model_test", + output_description="output_description", + inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + utility_model.description = "updated_description" + utility_model.update() + + assert utility_model.id == "123" + assert utility_model.description == "updated_description" From 14765a8bf0d2053c726d58cd145d579ba3e2e00c Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:56:48 -0300 Subject: [PATCH 080/105] ModelResponse.__setitem__ (#325) --- aixplain/modules/model/response.py | 12 ++++++++++++ tests/unit/model_test.py | 11 +++++++++++ 2 files changed, 23 insertions(+) diff --git a/aixplain/modules/model/response.py b/aixplain/modules/model/response.py index 99e32074..1576c1f4 100644 --- a/aixplain/modules/model/response.py +++ b/aixplain/modules/model/response.py @@ -46,6 +46,18 @@ def get(self, key: Text, default: Optional[Any] = None) -> Any: except KeyError: return default + def __setitem__(self, key: Text, value: Any) -> None: + if key in self.__dict__: + self.__dict__[key] = value + elif self.additional_fields and key in self.additional_fields: + self.additional_fields[key] = value + elif key == "usedCredits": + self.used_credits = value + elif key == "runTime": + self.run_time = value + else: + raise KeyError(f"Key '{key}' not found in ModelResponse.") + def __repr__(self) -> str: fields = [] if self.status: diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index 0ddf6345..68da9ef6 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -488,3 +488,14 @@ def test_check_finetune_status_no_logs(): assert status.epoch is None assert status.training_loss is None assert status.validation_loss is None + + +def test_model_response(): + response = ModelResponse(status="SUCCESS", data="test", used_credits=0, run_time=0, usage=None) + assert response["data"] == "test" + response["data"] = "thiago" + assert response["data"] == "thiago" + value = response.get("data") + assert value == "thiago" + value = response.get("not_found", "default_value") + assert value == "default_value" From 7732dadd2be8f6c72cc59b3a3122f68c406358eb Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Wed, 4 Dec 2024 19:29:35 +0300 Subject: [PATCH 081/105] ENG-1129: aixplain sdk caching functions (#324) * fixed corrupted file * added languages and licenses * made changes according to comments * changes to constants and re-added json checker * changes to constants and re-added json checker * added process after save json * Fixes in the caching function * Move Cache folder --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/enums/__init__.py | 2 +- aixplain/enums/function.py | 26 +++++++++++++--------- aixplain/enums/language.py | 37 +++++++++++++++++-------------- aixplain/enums/license.py | 41 ++++++++++++++++++++--------------- aixplain/utils/cache_utils.py | 27 +++++++++++++++++++++++ aixplain/utils/config.py | 4 +++- 6 files changed, 92 insertions(+), 45 deletions(-) create mode 100644 aixplain/utils/cache_utils.py diff --git a/aixplain/enums/__init__.py b/aixplain/enums/__init__.py index 947d59a9..ef497ddd 100644 --- a/aixplain/enums/__init__.py +++ b/aixplain/enums/__init__.py @@ -13,4 +13,4 @@ from .supplier import Supplier from .sort_by import SortBy from .sort_order import SortOrder -from .response_status import ResponseStatus \ No newline at end of file +from .response_status import ResponseStatus diff --git a/aixplain/enums/function.py b/aixplain/enums/function.py index 67b5eba0..12434707 100644 --- a/aixplain/enums/function.py +++ b/aixplain/enums/function.py @@ -21,27 +21,32 @@ Function Enum """ -import logging - from aixplain.utils import config from aixplain.utils.request_utils import _request_with_retry from enum import Enum from urllib.parse import urljoin +from aixplain.utils.cache_utils import save_to_cache, load_from_cache, CACHE_FOLDER + +CACHE_FILE = f"{CACHE_FOLDER}/functions.json" def load_functions(): api_key = config.TEAM_API_KEY backend_url = config.BACKEND_URL - url = urljoin(backend_url, "sdk/functions") + resp = load_from_cache(CACHE_FILE) + if resp is None: + url = urljoin(backend_url, "sdk/functions") + + headers = {"x-api-key": api_key, "Content-Type": "application/json"} + r = _request_with_retry("get", url, headers=headers) + if not 200 <= r.status_code < 300: + raise Exception( + f'Functions could not be loaded, probably due to the set API key (e.g. "{api_key}") is not valid. For help, please refer to the documentation (https://github.com/aixplain/aixplain#api-key-setup)' + ) + resp = r.json() + save_to_cache(CACHE_FILE, resp) - headers = {"x-api-key": api_key, "Content-Type": "application/json"} - r = _request_with_retry("get", url, headers=headers) - if not 200 <= r.status_code < 300: - raise Exception( - f'Functions could not be loaded, probably due to the set API key (e.g. "{api_key}") is not valid. For help, please refer to the documentation (https://github.com/aixplain/aixplain#api-key-setup)' - ) - resp = r.json() functions = Enum("Function", {w["id"].upper().replace("-", "_"): w["id"] for w in resp["items"]}, type=str) functions_input_output = { function["id"]: { @@ -57,4 +62,5 @@ def load_functions(): } return functions, functions_input_output + Function, FunctionInputOutput = load_functions() diff --git a/aixplain/enums/language.py b/aixplain/enums/language.py index 674940ab..db66b2a1 100644 --- a/aixplain/enums/language.py +++ b/aixplain/enums/language.py @@ -21,27 +21,32 @@ Language Enum """ -import logging - -from aixplain.utils import config -from aixplain.utils.request_utils import _request_with_retry from enum import Enum from urllib.parse import urljoin +from aixplain.utils import config +from aixplain.utils.request_utils import _request_with_retry +from aixplain.utils.cache_utils import save_to_cache, load_from_cache, CACHE_FOLDER + +CACHE_FILE = f"{CACHE_FOLDER}/languages.json" def load_languages(): - api_key = config.TEAM_API_KEY - backend_url = config.BACKEND_URL - - url = urljoin(backend_url, "sdk/languages") - - headers = {"x-api-key": api_key, "Content-Type": "application/json"} - r = _request_with_retry("get", url, headers=headers) - if not 200 <= r.status_code < 300: - raise Exception( - f'Languages could not be loaded, probably due to the set API key (e.g. "{api_key}") is not valid. For help, please refer to the documentation (https://github.com/aixplain/aixplain#api-key-setup)' - ) - resp = r.json() + resp = load_from_cache(CACHE_FILE) + if resp is None: + api_key = config.TEAM_API_KEY + backend_url = config.BACKEND_URL + + url = urljoin(backend_url, "sdk/languages") + + headers = {"x-api-key": api_key, "Content-Type": "application/json"} + r = _request_with_retry("get", url, headers=headers) + if not 200 <= r.status_code < 300: + raise Exception( + f'Languages could not be loaded, probably due to the set API key (e.g. "{api_key}") is not valid. For help, please refer to the documentation (https://github.com/aixplain/aixplain#api-key-setup)' + ) + resp = r.json() + save_to_cache(CACHE_FILE, resp) + languages = {} for w in resp: language = w["value"] diff --git a/aixplain/enums/license.py b/aixplain/enums/license.py index 14527829..a860a539 100644 --- a/aixplain/enums/license.py +++ b/aixplain/enums/license.py @@ -22,29 +22,36 @@ """ import logging - -from aixplain.utils import config -from aixplain.utils.request_utils import _request_with_retry from enum import Enum from urllib.parse import urljoin +from aixplain.utils import config +from aixplain.utils.request_utils import _request_with_retry +from aixplain.utils.cache_utils import save_to_cache, load_from_cache, CACHE_FOLDER + +CACHE_FILE = f"{CACHE_FOLDER}/licenses.json" def load_licenses(): + resp = load_from_cache(CACHE_FILE) + try: - api_key = config.TEAM_API_KEY - backend_url = config.BACKEND_URL - - url = urljoin(backend_url, "sdk/licenses") - - headers = {"x-api-key": api_key, "Content-Type": "application/json"} - r = _request_with_retry("get", url, headers=headers) - if not 200 <= r.status_code < 300: - raise Exception( - f'Licenses could not be loaded, probably due to the set API key (e.g. "{api_key}") is not valid. For help, please refer to the documentation (https://github.com/aixplain/aixplain#api-key-setup)' - ) - resp = r.json() - return Enum("License", {"_".join(w["name"].split()): w["id"] for w in resp}, type=str) - except Exception as e: + if resp is None: + api_key = config.TEAM_API_KEY + backend_url = config.BACKEND_URL + + url = urljoin(backend_url, "sdk/licenses") + + headers = {"x-api-key": api_key, "Content-Type": "application/json"} + r = _request_with_retry("get", url, headers=headers) + if not 200 <= r.status_code < 300: + raise Exception( + f'Licenses could not be loaded, probably due to the set API key (e.g. "{api_key}") is not valid. For help, please refer to the documentation (https://github.com/aixplain/aixplain#api-key-setup)' + ) + resp = r.json() + save_to_cache(CACHE_FILE, resp) + licenses = {"_".join(w["name"].split()): w["id"] for w in resp} + return Enum("License", licenses, type=str) + except Exception: logging.exception("License Loading Error") raise Exception("License Loading Error") diff --git a/aixplain/utils/cache_utils.py b/aixplain/utils/cache_utils.py new file mode 100644 index 00000000..5a0eb6ae --- /dev/null +++ b/aixplain/utils/cache_utils.py @@ -0,0 +1,27 @@ +import os +import json +import time +import logging + +CACHE_DURATION = 24 * 60 * 60 +CACHE_FOLDER = ".aixplain_cache" + + +def save_to_cache(cache_file, data): + try: + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + with open(cache_file, "w") as f: + json.dump({"timestamp": time.time(), "data": data}, f) + except Exception as e: + logging.error(f"Failed to save cache to {cache_file}: {e}") + + +def load_from_cache(cache_file): + if os.path.exists(cache_file) is True: + with open(cache_file, "r") as f: + cache_data = json.load(f) + if time.time() - cache_data["timestamp"] < CACHE_DURATION: + return cache_data["data"] + else: + return None + return None diff --git a/aixplain/utils/config.py b/aixplain/utils/config.py index b47bc4f7..aa0d46e6 100644 --- a/aixplain/utils/config.py +++ b/aixplain/utils/config.py @@ -26,7 +26,9 @@ if AIXPLAIN_API_KEY and TEAM_API_KEY: if AIXPLAIN_API_KEY != TEAM_API_KEY: - raise Exception("Conflicting API keys: 'AIXPLAIN_API_KEY' and 'TEAM_API_KEY' are both provided but do not match. Please provide only one API key.") + raise Exception( + "Conflicting API keys: 'AIXPLAIN_API_KEY' and 'TEAM_API_KEY' are both provided but do not match. Please provide only one API key." + ) if AIXPLAIN_API_KEY and not TEAM_API_KEY: From 3ccbf62c62a9c0b97bd361308d4056ebca417d26 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 4 Dec 2024 19:00:52 -0300 Subject: [PATCH 082/105] ENG-1110: onboard utility model (#329) * Onboard Utilities * Tests for utility models * Parse code function * Adapting utility model onboarding to unit tests * Unit test for utility removal --- aixplain/factories/model_factory/__init__.py | 19 +- aixplain/modules/model/utility_model.py | 59 +++--- aixplain/modules/model/utils.py | 61 +++++- .../model/run_utility_model_test.py | 8 +- tests/unit/utility_test.py | 190 +++++++++++++----- 5 files changed, 238 insertions(+), 99 deletions(-) diff --git a/aixplain/factories/model_factory/__init__.py b/aixplain/factories/model_factory/__init__.py index 8ec3183b..ca035825 100644 --- a/aixplain/factories/model_factory/__init__.py +++ b/aixplain/factories/model_factory/__init__.py @@ -20,7 +20,7 @@ Description: Model Factory Class """ -from typing import Dict, List, Optional, Text, Tuple, Union +from typing import Callable, Dict, List, Optional, Text, Tuple, Union import json import logging from aixplain.modules.model import Model @@ -42,16 +42,21 @@ class ModelFactory: @classmethod def create_utility_model( - cls, name: Text, description: Text, inputs: List[UtilityModelInput], code: Text, output_description: Text + cls, + name: Text, + code: Union[Text, Callable], + inputs: List[UtilityModelInput] = [], + description: Optional[Text] = None, + output_examples: Text = "", ) -> UtilityModel: """Create a utility model Args: name (Text): name of the model - description (Text): description of the model - inputs (List[UtilityModelInput]): inputs of the model - code (Text): code of the model - output_description (Text): description of the output + code (Union[Text, Callable]): code of the model + description (Text, optional): description of the model + inputs (List[UtilityModelInput], optional): inputs of the model + output_examples (Text, optional): output examples Returns: UtilityModel: created utility model @@ -64,7 +69,7 @@ def create_utility_model( code=code, function=Function.UTILITIES, api_key=config.TEAM_API_KEY, - output_description=output_description, + output_examples=output_examples, ) payload = utility_model.to_dict() url = urljoin(cls.backend_url, "sdk/utilities") diff --git a/aixplain/modules/model/utility_model.py b/aixplain/modules/model/utility_model.py index 31bc6058..1bc40f67 100644 --- a/aixplain/modules/model/utility_model.py +++ b/aixplain/modules/model/utility_model.py @@ -19,14 +19,13 @@ Utility Model Class """ import logging -import os -import validators from aixplain.enums import Function, Supplier, DataType from aixplain.modules.model import Model from aixplain.utils import config from aixplain.utils.file_utils import _request_with_retry +from aixplain.modules.model.utils import parse_code from dataclasses import dataclass -from typing import Union, Optional, List, Text, Dict +from typing import Callable, Union, Optional, List, Text, Dict from urllib.parse import urljoin @@ -53,15 +52,16 @@ class UtilityModel(Model): Attributes: id (Text): ID of the Model name (Text): Name of the Model - description (Text, optional): description of the model. Defaults to "". + code (Union[Text, Callable]): code of the model. + description (Text): description of the model. Defaults to "". + inputs (List[UtilityModelInput]): inputs of the model. Defaults to []. + output_examples (Text): output examples. Defaults to "". api_key (Text, optional): API key of the Model. Defaults to None. - url (Text, optional): endpoint of the model. Defaults to config.MODELS_RUN_URL. supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". version (Text, optional): version of the model. Defaults to "1.0". - function (Text, optional): model AI function. Defaults to None. - url (str): URL to run the model. - backend_url (str): URL of the backend. - pricing (Dict, optional): model price. Defaults to None. + function (Function, optional): model AI function. Defaults to None. + is_subscribed (bool, optional): Is the user subscribed. Defaults to False. + cost (Dict, optional): model price. Defaults to None. **additional_info: Any additional Model info to be saved """ @@ -69,10 +69,10 @@ def __init__( self, id: Text, name: Text, - description: Text, - code: Text, - inputs: List[UtilityModelInput], - output_description: Text, + code: Union[Text, Callable], + description: Optional[Text] = None, + inputs: List[UtilityModelInput] = [], + output_examples: Text = "", api_key: Optional[Text] = None, supplier: Union[Dict, Text, Supplier, int] = "aiXplain", version: Optional[Text] = None, @@ -86,10 +86,10 @@ def __init__( Args: id (Text): ID of the Model name (Text): Name of the Model - description (Text): description of the model. - code (Text): code of the model. - inputs (List[UtilityModelInput]): inputs of the model. - output_description (Text): description of the output + code (Union[Text, Callable]): code of the model. + description (Text): description of the model. Defaults to "". + inputs (List[UtilityModelInput]): inputs of the model. Defaults to []. + output_examples (Text): output examples. Defaults to "". api_key (Text, optional): API key of the Model. Defaults to None. supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". version (Text, optional): version of the model. Defaults to "1.0". @@ -115,27 +115,20 @@ def __init__( self.backend_url = config.BACKEND_URL self.code = code self.inputs = inputs - self.output_description = output_description + self.output_examples = output_examples self.validate() def validate(self): - from aixplain.factories.file_factory import FileFactory - from uuid import uuid4 - + self.code, inputs, description = parse_code(self.code) + assert description is not None or self.description is not None, "Utility Model Error: Model description is required" + if self.description is None: + self.description = description + if len(self.inputs) == 0: + self.inputs = inputs assert self.name and self.name.strip() != "", "Name is required" assert self.description and self.description.strip() != "", "Description is required" assert self.code and self.code.strip() != "", "Code is required" - assert self.inputs and len(self.inputs) > 0, "At least one input is required" - assert self.output_description and self.output_description.strip() != "", "Output description is required" - - self.code = FileFactory.to_link(self.code) - # store code in a temporary local path if it is not a valid URL or S3 path - if not validators.url(self.code) and not self.code.startswith("s3:"): - local_path = str(uuid4()) - with open(local_path, "w") as f: - f.write(self.code) - self.code = FileFactory.upload(local_path=local_path, is_temp=True) - os.remove(local_path) + assert self.output_examples and self.output_examples.strip() != "", "Output description is required" def to_dict(self): return { @@ -144,7 +137,7 @@ def to_dict(self): "inputs": [input.to_dict() for input in self.inputs], "code": self.code, "function": self.function.value, - "outputDescription": self.output_description, + "outputDescription": self.output_examples, } def update(self): diff --git a/aixplain/modules/model/utils.py b/aixplain/modules/model/utils.py index 13cc1f7c..68131c96 100644 --- a/aixplain/modules/model/utils.py +++ b/aixplain/modules/model/utils.py @@ -3,7 +3,7 @@ import json import logging from aixplain.utils.file_utils import _request_with_retry -from typing import Dict, Text, Union, Optional +from typing import Callable, Dict, List, Text, Tuple, Union, Optional def build_payload(data: Union[Text, Dict], parameters: Optional[Dict] = None): @@ -77,3 +77,62 @@ def call_run_endpoint(url: Text, api_key: Text, payload: Dict) -> Dict: response = {"status": "FAILED", "error_message": error, "completed": True} logging.error(f"Error in request: {r.status_code}: {error}") return response + + +def parse_code(code: Union[Text, Callable]) -> Tuple[Text, List, Text]: + import inspect + import os + import re + import requests + import validators + from aixplain.enums import DataType + from aixplain.modules.model.utility_model import UtilityModelInput + from aixplain.factories.file_factory import FileFactory + from uuid import uuid4 + + inputs, description = [], "" + + if isinstance(code, Callable): + str_code = inspect.getsource(code) + description = code.__doc__.strip() if code.__doc__ else "" + elif os.path.exists(code): + with open(code, "r") as f: + str_code = f.read() + elif validators.url(code): + str_code = requests.get(code).text + else: + str_code = code + + # assert str_code has a main function + if "def main(" not in str_code: + raise Exception("Utility Model Error: Code must have a main function") + + f = re.findall(r"main\((.*?(?:\s*=\s*[^,)]+)?(?:\s*,\s*.*?(?:\s*=\s*[^,)]+)?)*)\)", str_code) + parameters = f[0].split(",") if len(f) > 0 else [] + + for input in parameters: + assert ( + len(input.split(":")) > 1 + ), "Utility Model Error: Input type is required. For instance def main(a: int, b: int) -> int:" + input_name, input_type = input.split(":") + input_name = input_name.strip() + input_type = input_type.split("=")[0].strip() + + if input_type in ["int", "float"]: + input_type = "number" + inputs.append(UtilityModelInput(name=input_name, type=DataType.NUMBER, description="")) + elif input_type == "bool": + input_type = "boolean" + inputs.append(UtilityModelInput(name=input_name, type=DataType.BOOLEAN, description="")) + elif input_type == "str": + input_type = "text" + inputs.append(UtilityModelInput(name=input_name, type=DataType.TEXT, description="")) + else: + raise Exception(f"Utility Model Error: Unsupported input type: {input_type}") + + local_path = str(uuid4()) + with open(local_path, "w") as f: + f.write(str_code) + code = FileFactory.upload(local_path=local_path, is_temp=True) + os.remove(local_path) + return code, inputs, description diff --git a/tests/functional/model/run_utility_model_test.py b/tests/functional/model/run_utility_model_test.py index 5887c4ca..ce0b7579 100644 --- a/tests/functional/model/run_utility_model_test.py +++ b/tests/functional/model/run_utility_model_test.py @@ -14,17 +14,19 @@ def test_run_utility_model(): name="test_script", description="This is a test script", inputs=inputs, - code="def main(inputA):\n\treturn inputA", - output_description=output_description, + code="def main(inputA: str):\n\treturn inputA", + output_examples=output_description, ) assert utility_model.id is not None + assert utility_model.inputs == inputs + assert utility_model.output_examples == output_description response = utility_model.run(data={"inputA": "test"}) assert response.status == "SUCCESS" assert response.data == "test" - utility_model.code = "def main(inputA):\n\treturn 5" + utility_model.code = "def main(inputA: str):\n\treturn 5" utility_model.update() response = utility_model.run(data={"inputA": "test"}) assert response.status == "SUCCESS" diff --git a/tests/unit/utility_test.py b/tests/unit/utility_test.py index c1b7b9e1..89803cac 100644 --- a/tests/unit/utility_test.py +++ b/tests/unit/utility_test.py @@ -5,6 +5,7 @@ from aixplain.utils import config from aixplain.enums import DataType, Function from aixplain.modules.model.utility_model import UtilityModel, UtilityModelInput +from aixplain.modules.model.utils import parse_code from unittest.mock import patch @@ -16,84 +17,163 @@ def test_utility_model(): utility_model = ModelFactory.create_utility_model( name="utility_model_test", description="utility_model_test", - code="utility_model_test", - inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], - output_description="output_description", + code="def main(originCode: str)", + output_examples="output_description", ) assert utility_model.id == "123" assert utility_model.name == "utility_model_test" assert utility_model.description == "utility_model_test" assert utility_model.code == "utility_model_test" - assert utility_model.inputs == [ - UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT) - ] - assert utility_model.output_description == "output_description" + assert utility_model.inputs == [UtilityModelInput(name="originCode", description="", type=DataType.TEXT)] + assert utility_model.output_examples == "output_description" def test_utility_model_with_invalid_name(): - with pytest.raises(Exception) as exc_info: - ModelFactory.create_utility_model( - name="", - description="utility_model_test", - code="utility_model_test", - inputs=[], - output_description="output_description", - ) - assert str(exc_info.value) == "Name is required" - - -def test_utility_model_with_invalid_inputs(): - with pytest.raises(Exception) as exc_info: - ModelFactory.create_utility_model( - name="utility_model_test", - description="utility_model_test", - code="utility_model_test", - inputs=[], - output_description="output_description", - ) - assert str(exc_info.value) == "At least one input is required" + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="utility_model_test"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="utility_model_test"): + with patch( + "aixplain.modules.model.utils.parse_code", + return_value=( + "def main(originCode: str)", + [UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + "utility_model_test", + ), + ): + with pytest.raises(Exception) as exc_info: + ModelFactory.create_utility_model( + name="", + description="utility_model_test", + code="def main(originCode: str)", + inputs=[], + output_examples="output_description", + ) + assert str(exc_info.value) == "Name is required" def test_utility_model_to_dict(): with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="utility_model_test"): with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="utility_model_test"): - utility_model = UtilityModel( - id="123", - name="utility_model_test", - description="utility_model_test", - code="utility_model_test", - output_description="output_description", - inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], - function=Function.UTILITIES, - api_key=config.TEAM_API_KEY, - ) - assert utility_model.to_dict() == { - "name": "utility_model_test", - "description": "utility_model_test", - "inputs": [{"name": "originCode", "description": "originCode", "type": "text"}], - "code": "utility_model_test", - "function": "utilities", - "outputDescription": "output_description", - } + with patch( + "aixplain.modules.model.utils.parse_code", + return_value=( + "def main(originCode: str)", + [UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + "utility_model_test", + ), + ): + utility_model = UtilityModel( + id="123", + name="utility_model_test", + description="utility_model_test", + code="def main(originCode: str)", + output_examples="output_description", + inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + assert utility_model.to_dict() == { + "name": "utility_model_test", + "description": "utility_model_test", + "inputs": [{"name": "originCode", "description": "originCode", "type": "text"}], + "code": "utility_model_test", + "function": "utilities", + "outputDescription": "output_description", + } def test_update_utility_model(): with requests_mock.Mocker() as mock: - with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="utility_model_test"): - with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="utility_model_test"): - mock.put(urljoin(config.BACKEND_URL, "sdk/utilities/123"), json={"id": "123"}) + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="def main(originCode: str)"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="def main(originCode: str)"): + with patch( + "aixplain.modules.model.utils.parse_code", + return_value=( + "def main(originCode: str)", + [UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + "utility_model_test", + ), + ): + mock.put(urljoin(config.BACKEND_URL, "sdk/utilities/123"), json={"id": "123"}) + utility_model = UtilityModel( + id="123", + name="utility_model_test", + description="utility_model_test", + code="def main(originCode: str)", + output_examples="output_description", + inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + utility_model.description = "updated_description" + utility_model.update() + + assert utility_model.id == "123" + assert utility_model.description == "updated_description" + + +def test_delete_utility_model(): + with requests_mock.Mocker() as mock: + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="def main(originCode: str)"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="def main(originCode: str)"): + mock.delete(urljoin(config.BACKEND_URL, "sdk/utilities/123"), status_code=200, json={"id": "123"}) utility_model = UtilityModel( id="123", name="utility_model_test", description="utility_model_test", - code="utility_model_test", - output_description="output_description", + code="def main(originCode: str)", + output_examples="output_description", inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], function=Function.UTILITIES, api_key=config.TEAM_API_KEY, ) - utility_model.description = "updated_description" - utility_model.update() + utility_model.delete() + assert mock.called - assert utility_model.id == "123" - assert utility_model.description == "updated_description" + +def test_parse_code(): + # Code is a string + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="code_link"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="code_link"): + code = "def main(originCode: str) -> str:\n return originCode" + code_link, inputs, description = parse_code(code) + assert inputs == [UtilityModelInput(name="originCode", description="", type=DataType.TEXT)] + assert description == "" + assert code_link == "code_link" + + # Code is a function + def main(a: int, b: int): + """ + This function adds two numbers + """ + return a + b + + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="code_link"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="code_link"): + code = main + code_link, inputs, description = parse_code(code) + assert inputs == [ + UtilityModelInput(name="a", description="", type=DataType.NUMBER), + UtilityModelInput(name="b", description="", type=DataType.NUMBER), + ] + assert description == "This function adds two numbers" + assert code_link == "code_link" + + # Code must have a main function + code = "def wrong_function_name(originCode: str) -> str:\n return originCode" + with pytest.raises(Exception) as exc_info: + parse_code(code) + assert str(exc_info.value) == "Utility Model Error: Code must have a main function" + + # Input type is required + def main(originCode): + return originCode + + with pytest.raises(Exception) as exc_info: + parse_code(main) + assert str(exc_info.value) == "Utility Model Error: Input type is required. For instance def main(a: int, b: int) -> int:" + + # Unsupported input type + code = "def main(originCode: list) -> str:\n return originCode" + with pytest.raises(Exception) as exc_info: + parse_code(code) + assert str(exc_info.value) == "Utility Model Error: Unsupported input type: list" From b7ece3fa4671ad515f0dcf1ffb3be1176463168a Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Thu, 5 Dec 2024 22:08:42 +0300 Subject: [PATCH 083/105] BUG-262: Payload gets values from both parameters and data (#327) * Payload gets values from both parameters and data * fixed data string issue * Fixed cirular reference * cleaned up the code * Remove unused logging --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/modules/model/llm_model.py | 50 +++++++++++++++-------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 600fd32e..1f64f246 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -126,19 +126,21 @@ def run( Dict: parsed output from model """ start = time.time() - if parameters is None: - parameters = {} - parameters.update( - { - "context": parameters.get("context", context), - "prompt": parameters.get("prompt", prompt), - "history": parameters.get("history", history), - "temperature": parameters.get("temperature", temperature), - "max_tokens": parameters.get("max_tokens", max_tokens), - "top_p": parameters.get("top_p", top_p), - } - ) + parameters = parameters or {} + + if isinstance(data, dict): + parameters = {**data, **parameters} + data = data.get("data", "") + + parameters.setdefault("context", context) + parameters.setdefault("prompt", prompt) + parameters.setdefault("history", history) + parameters.setdefault("temperature", temperature) + parameters.setdefault("max_tokens", max_tokens) + parameters.setdefault("top_p", top_p) + payload = build_payload(data=data, parameters=parameters) + logging.info(payload) url = f"{self.url}/{self.id}".replace("/api/v1/execute", "/api/v2/execute") logging.debug(f"Model Run Sync: Start service for {name} - {url}") response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) @@ -195,18 +197,18 @@ def run_async( """ url = f"{self.url}/{self.id}" logging.debug(f"Model Run Async: Start service for {name} - {url}") - if parameters is None: - parameters = {} - parameters.update( - { - "context": parameters.get("context", context), - "prompt": parameters.get("prompt", prompt), - "history": parameters.get("history", history), - "temperature": parameters.get("temperature", temperature), - "max_tokens": parameters.get("max_tokens", max_tokens), - "top_p": parameters.get("top_p", top_p), - } - ) + parameters = parameters or {} + + if isinstance(data, dict): + parameters = {**data, **parameters} + data = data.get("data", "") + + parameters.setdefault("context", context) + parameters.setdefault("prompt", prompt) + parameters.setdefault("history", history) + parameters.setdefault("temperature", temperature) + parameters.setdefault("max_tokens", max_tokens) + parameters.setdefault("top_p", top_p) payload = build_payload(data=data, parameters=parameters) response = call_run_endpoint(payload=payload, url=url, api_key=self.api_key) return ModelResponse( From 14c26d47b5353cac925d896f1c29845f88700f5a Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 9 Dec 2024 10:49:20 -0300 Subject: [PATCH 084/105] Fixing validation of utility models (#331) --- aixplain/factories/model_factory/__init__.py | 1 + aixplain/modules/model/utility_model.py | 2 -- tests/unit/utility_test.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/aixplain/factories/model_factory/__init__.py b/aixplain/factories/model_factory/__init__.py index ca035825..4a592ab9 100644 --- a/aixplain/factories/model_factory/__init__.py +++ b/aixplain/factories/model_factory/__init__.py @@ -71,6 +71,7 @@ def create_utility_model( api_key=config.TEAM_API_KEY, output_examples=output_examples, ) + utility_model.validate() payload = utility_model.to_dict() url = urljoin(cls.backend_url, "sdk/utilities") headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} diff --git a/aixplain/modules/model/utility_model.py b/aixplain/modules/model/utility_model.py index 1bc40f67..a9fe514e 100644 --- a/aixplain/modules/model/utility_model.py +++ b/aixplain/modules/model/utility_model.py @@ -116,7 +116,6 @@ def __init__( self.code = code self.inputs = inputs self.output_examples = output_examples - self.validate() def validate(self): self.code, inputs, description = parse_code(self.code) @@ -128,7 +127,6 @@ def validate(self): assert self.name and self.name.strip() != "", "Name is required" assert self.description and self.description.strip() != "", "Description is required" assert self.code and self.code.strip() != "", "Code is required" - assert self.output_examples and self.output_examples.strip() != "", "Output description is required" def to_dict(self): return { diff --git a/tests/unit/utility_test.py b/tests/unit/utility_test.py index 89803cac..265e31b3 100644 --- a/tests/unit/utility_test.py +++ b/tests/unit/utility_test.py @@ -65,7 +65,7 @@ def test_utility_model_to_dict(): id="123", name="utility_model_test", description="utility_model_test", - code="def main(originCode: str)", + code="utility_model_test", output_examples="output_description", inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], function=Function.UTILITIES, From 717b2b1c9aeaeb7d64ec4cc7d41cca8dc1aada27 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:55:34 -0300 Subject: [PATCH 085/105] ENG-1110: Onboarding an Utility Model (#334) * Onboard Utilities * Tests for utility models * Parse code function * Adapting utility model onboarding to unit tests * Unit test for utility removal * Auto-describe utility model inputs --- aixplain/modules/model/utils.py | 12 +++++++++--- tests/unit/utility_test.py | 12 ++++++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/aixplain/modules/model/utils.py b/aixplain/modules/model/utils.py index 68131c96..f3691928 100644 --- a/aixplain/modules/model/utils.py +++ b/aixplain/modules/model/utils.py @@ -120,13 +120,19 @@ def parse_code(code: Union[Text, Callable]) -> Tuple[Text, List, Text]: if input_type in ["int", "float"]: input_type = "number" - inputs.append(UtilityModelInput(name=input_name, type=DataType.NUMBER, description="")) + inputs.append( + UtilityModelInput(name=input_name, type=DataType.NUMBER, description=f"The {input_name} input is a number") + ) elif input_type == "bool": input_type = "boolean" - inputs.append(UtilityModelInput(name=input_name, type=DataType.BOOLEAN, description="")) + inputs.append( + UtilityModelInput(name=input_name, type=DataType.BOOLEAN, description=f"The {input_name} input is a boolean") + ) elif input_type == "str": input_type = "text" - inputs.append(UtilityModelInput(name=input_name, type=DataType.TEXT, description="")) + inputs.append( + UtilityModelInput(name=input_name, type=DataType.TEXT, description=f"The {input_name} input is a text") + ) else: raise Exception(f"Utility Model Error: Unsupported input type: {input_type}") diff --git a/tests/unit/utility_test.py b/tests/unit/utility_test.py index 265e31b3..2f3ba6a1 100644 --- a/tests/unit/utility_test.py +++ b/tests/unit/utility_test.py @@ -24,7 +24,9 @@ def test_utility_model(): assert utility_model.name == "utility_model_test" assert utility_model.description == "utility_model_test" assert utility_model.code == "utility_model_test" - assert utility_model.inputs == [UtilityModelInput(name="originCode", description="", type=DataType.TEXT)] + assert utility_model.inputs == [ + UtilityModelInput(name="originCode", description="The originCode input is a text", type=DataType.TEXT) + ] assert utility_model.output_examples == "output_description" @@ -136,7 +138,9 @@ def test_parse_code(): with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="code_link"): code = "def main(originCode: str) -> str:\n return originCode" code_link, inputs, description = parse_code(code) - assert inputs == [UtilityModelInput(name="originCode", description="", type=DataType.TEXT)] + assert inputs == [ + UtilityModelInput(name="originCode", description="The originCode input is a text", type=DataType.TEXT) + ] assert description == "" assert code_link == "code_link" @@ -152,8 +156,8 @@ def main(a: int, b: int): code = main code_link, inputs, description = parse_code(code) assert inputs == [ - UtilityModelInput(name="a", description="", type=DataType.NUMBER), - UtilityModelInput(name="b", description="", type=DataType.NUMBER), + UtilityModelInput(name="a", description="The a input is a number", type=DataType.NUMBER), + UtilityModelInput(name="b", description="The b input is a number", type=DataType.NUMBER), ] assert description == "This function adds two numbers" assert code_link == "code_link" From 4988de0953fec918513256542fc6af19a7c0aa5a Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Wed, 11 Dec 2024 17:47:48 -0300 Subject: [PATCH 086/105] BUG-283: Fix finetune functional tests (#336) * Fix finetune functional tests * Improvements on the functional tests --------- Co-authored-by: Thiago Castro Ferreira --- tests/functional/apikey/test_api.py | 8 ++++---- .../finetune/data/finetune_test_end2end.json | 4 ++-- .../finetune/finetune_functional_test.py | 14 ++++++++++---- tests/functional/model/run_model_test.py | 10 +++++++--- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tests/functional/apikey/test_api.py b/tests/functional/apikey/test_api.py index 2c228f6b..3bd17a54 100644 --- a/tests/functional/apikey/test_api.py +++ b/tests/functional/apikey/test_api.py @@ -1,6 +1,6 @@ from aixplain.factories.api_key_factory import APIKeyFactory from aixplain.modules import APIKey, APIKeyLimits, APIKeyUsageLimit -from datetime import datetime +from datetime import datetime, timedelta, timezone import json import pytest @@ -11,7 +11,7 @@ def test_create_api_key_from_json(): with open(api_key_json, "r") as file: api_key_data = json.load(file) - expires_at = datetime.strptime(api_key_data["expires_at"], "%Y-%m-%dT%H:%M:%SZ") + expires_at = (datetime.now(timezone.utc) + timedelta(weeks=4)).strftime("%Y-%m-%dT%H:%M:%SZ") api_key = APIKeyFactory.create( name=api_key_data["name"], @@ -54,7 +54,7 @@ def test_create_api_key_from_dict(): ], "global_limits": {"token_per_minute": 100, "token_per_day": 1000, "request_per_day": 1000, "request_per_minute": 100}, "budget": 1000, - "expires_at": "2024-12-12T00:00:00Z", + "expires_at": (datetime.now(timezone.utc) + timedelta(weeks=4)).strftime("%Y-%m-%dT%H:%M:%SZ"), } api_key_name = "Test API Key" @@ -86,7 +86,7 @@ def test_create_update_api_key_from_dict(): ], "global_limits": {"token_per_minute": 100, "token_per_day": 1000, "request_per_day": 1000, "request_per_minute": 100}, "budget": 1000, - "expires_at": "2024-12-12T00:00:00Z", + "expires_at": (datetime.now(timezone.utc) + timedelta(weeks=4)).strftime("%Y-%m-%dT%H:%M:%SZ"), } api_key_name = "Test API Key" diff --git a/tests/functional/finetune/data/finetune_test_end2end.json b/tests/functional/finetune/data/finetune_test_end2end.json index 90232a03..68499460 100644 --- a/tests/functional/finetune/data/finetune_test_end2end.json +++ b/tests/functional/finetune/data/finetune_test_end2end.json @@ -8,8 +8,8 @@ "search_metadata": false }, { - "model_name": "aiR", - "model_id": "6499cc946eb5633de15d82a1", + "model_name": "aiR v2", + "model_id": "66eae6656eb56311f2595011", "dataset_name": "Test search dataset", "inference_data": "Hello!", "required_dev": false, diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index 46520137..f3a85498 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -59,7 +59,6 @@ def pytest_generate_tests(metafunc): if "input_map" in metafunc.fixturenames: four_weeks_ago = datetime.now(timezone.utc) - timedelta(weeks=4) models = ModelFactory.list(function=Function.TEXT_GENERATION, is_finetunable=True)["results"] - recent_models = [ { "model_name": model.name, @@ -70,10 +69,17 @@ def pytest_generate_tests(metafunc): "search_metadata": False, } for model in models - if model.created_at is not None and model.created_at >= four_weeks_ago + if model.created_at is not None + and model.created_at >= four_weeks_ago + and "aiXplain-testing" not in str(model.supplier) ] - recent_models += read_data(RUN_FILE) - metafunc.parametrize("input_map", recent_models) + + run_file_models = read_data(RUN_FILE) + for model_data in run_file_models: + if not any(rm["model_id"] == model_data["model_id"] for rm in recent_models): + recent_models.append(model_data) + model_ids = [model["model_id"] for model in recent_models] + metafunc.parametrize("input_map", recent_models, ids=model_ids) def test_end2end(input_map): diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py index 04335d19..dae11dea 100644 --- a/tests/functional/model/run_model_test.py +++ b/tests/functional/model/run_model_test.py @@ -21,9 +21,14 @@ def pytest_generate_tests(metafunc): if m.name == predefined_model and "aiXplain-testing" not in str(m.supplier) ] ) - recent_models = [model for model in models if model.created_at and model.created_at >= four_weeks_ago] + recent_models = [ + model + for model in models + if model.created_at and model.created_at >= four_weeks_ago and "aiXplain-testing" not in str(model.supplier) + ] combined_models = recent_models + predefined_models - metafunc.parametrize("llm_model", combined_models) + model_ids = [model.id for model in combined_models] + metafunc.parametrize("llm_model", combined_models, ids=model_ids) def test_llm_run(llm_model): @@ -35,7 +40,6 @@ def test_llm_run(llm_model): history=[{"role": "user", "content": "Hello! My name is Thiago."}, {"role": "assistant", "content": "Hello!"}], ) assert response["status"] == "SUCCESS" - assert "thiago" in response["data"].lower() def test_run_async(): From cc92516185bb78d199dcdf64d3d30d904d8d25cb Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 12 Dec 2024 10:19:33 -0300 Subject: [PATCH 087/105] Improvements in functional tests and set default temperature on LLMs (#337) --- aixplain/factories/model_factory/utils.py | 6 +++++- aixplain/modules/model/llm_model.py | 14 ++++++++------ .../general_assets/asset_functional_test.py | 4 ++-- .../data/asset_run_test_data.json | 4 ++++ tests/functional/pipelines/run_test.py | 17 +++++++++++++++++ 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/aixplain/factories/model_factory/utils.py b/aixplain/factories/model_factory/utils.py index 01423795..d745885c 100644 --- a/aixplain/factories/model_factory/utils.py +++ b/aixplain/factories/model_factory/utils.py @@ -31,10 +31,13 @@ def create_model_from_response(response: Dict) -> Model: parameters[param["name"]] = [w["value"] for w in param["values"]] function = Function(response["function"]["id"]) - inputs = [] + inputs, temperature = [], None ModelClass = Model if function == Function.TEXT_GENERATION: ModelClass = LLM + f = [p for p in response.get("params", []) if p["name"] == "temperature"] + if len(f) > 0 and len(f[0].get("defaultValues", [])) > 0: + temperature = float(f[0]["defaultValues"][0]["value"]) elif function == Function.UTILITIES: ModelClass = UtilityModel inputs = [ @@ -67,6 +70,7 @@ def create_model_from_response(response: Dict) -> Model: is_subscribed=True if "subscription" in response else False, version=response["version"]["id"], inputs=inputs, + temperature=temperature, ) diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index 1f64f246..cf60d0a2 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -61,6 +61,7 @@ def __init__( function: Optional[Function] = None, is_subscribed: bool = False, cost: Optional[Dict] = None, + temperature: float = 0.001, **additional_info, ) -> None: """LLM Init @@ -92,6 +93,7 @@ def __init__( ) self.url = config.MODELS_RUN_URL self.backend_url = config.BACKEND_URL + self.temperature = temperature def run( self, @@ -99,7 +101,7 @@ def run( context: Optional[Text] = None, prompt: Optional[Text] = None, history: Optional[List[Dict]] = None, - temperature: float = 0.001, + temperature: Optional[float] = None, max_tokens: int = 128, top_p: float = 1.0, name: Text = "model_process", @@ -114,7 +116,7 @@ def run( context (Optional[Text], optional): System message. Defaults to None. prompt (Optional[Text], optional): Prompt Message which comes on the left side of the last utterance. Defaults to None. history (Optional[List[Dict]], optional): Conversation history in OpenAI format ([{ "role": "assistant", "content": "Hello, world!"}]). Defaults to None. - temperature (float, optional): LLM temperature. Defaults to 0.001. + temperature (Optional[float], optional): LLM temperature. Defaults to None. max_tokens (int, optional): Maximum Generation Tokens. Defaults to 128. top_p (float, optional): Top P. Defaults to 1.0. name (Text, optional): ID given to a call. Defaults to "model_process". @@ -135,7 +137,7 @@ def run( parameters.setdefault("context", context) parameters.setdefault("prompt", prompt) parameters.setdefault("history", history) - parameters.setdefault("temperature", temperature) + parameters.setdefault("temperature", temperature if temperature is not None else self.temperature) parameters.setdefault("max_tokens", max_tokens) parameters.setdefault("top_p", top_p) @@ -173,7 +175,7 @@ def run_async( context: Optional[Text] = None, prompt: Optional[Text] = None, history: Optional[List[Dict]] = None, - temperature: float = 0.001, + temperature: Optional[float] = None, max_tokens: int = 128, top_p: float = 1.0, name: Text = "model_process", @@ -186,7 +188,7 @@ def run_async( context (Optional[Text], optional): System message. Defaults to None. prompt (Optional[Text], optional): Prompt Message which comes on the left side of the last utterance. Defaults to None. history (Optional[List[Dict]], optional): Conversation history in OpenAI format ([{ "role": "assistant", "content": "Hello, world!"}]). Defaults to None. - temperature (float, optional): LLM temperature. Defaults to 0.001. + temperature (Optional[float], optional): LLM temperature. Defaults to None. max_tokens (int, optional): Maximum Generation Tokens. Defaults to 128. top_p (float, optional): Top P. Defaults to 1.0. name (Text, optional): ID given to a call. Defaults to "model_process". @@ -206,7 +208,7 @@ def run_async( parameters.setdefault("context", context) parameters.setdefault("prompt", prompt) parameters.setdefault("history", history) - parameters.setdefault("temperature", temperature) + parameters.setdefault("temperature", temperature if temperature is not None else self.temperature) parameters.setdefault("max_tokens", max_tokens) parameters.setdefault("top_p", top_p) payload = build_payload(data=data, parameters=parameters) diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index 266b04ea..a826ad19 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -19,7 +19,7 @@ def inputs(): def __get_asset_factory(asset_name): - if asset_name == "model": + if "model" in asset_name: AssetFactory = ModelFactory elif asset_name == "dataset": AssetFactory = DatasetFactory @@ -40,7 +40,7 @@ def test_list(asset_name): assert asset_list["page_total"] == len(asset_list["results"]) -@pytest.mark.parametrize("asset_name", ["model", "pipeline", "metric"]) +@pytest.mark.parametrize("asset_name", ["model", "model2", "model3", "pipeline", "metric"]) def test_run(inputs, asset_name): asset_details = inputs[asset_name] AssetFactory = __get_asset_factory(asset_name) diff --git a/tests/functional/general_assets/data/asset_run_test_data.json b/tests/functional/general_assets/data/asset_run_test_data.json index e24df1ef..c9db273d 100644 --- a/tests/functional/general_assets/data/asset_run_test_data.json +++ b/tests/functional/general_assets/data/asset_run_test_data.json @@ -7,6 +7,10 @@ "id" : "60ddefab8d38c51c5885ee38", "data": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/myname.mp3" }, + "model3" : { + "id" : "6736411cf127849667606689", + "data": "How to cook a shrimp risotto?" + }, "pipeline": { "name": "SingleNodePipeline", "data": "This is a test sentence." diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index 6ca9e6fe..985e4a91 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -251,3 +251,20 @@ def test_run_script(version: str): assert response["status"] == "SUCCESS" data = response["data"][0]["segments"][0]["response"] assert data.startswith("SCRIPT MODIFIED:") + + +@pytest.mark.parametrize("version", ["2.0", "3.0"]) +def test_run_text_reconstruction(version: str): + pipeline = PipelineFactory.list(query="Text Reconstruction - DO NOT DELETE")["results"][0] + response = pipeline.run("Segment A\nSegment B\nSegment C", **{"version": version}) + + assert response["status"] == "SUCCESS" + labels = [d["label"] for d in response["data"]] + assert "Audio (Direct)" in labels + assert "Audio (Text Reconstruction)" in labels + assert "Audio (Audio Reconstruction)" in labels + assert "Text Reconstruction" in labels + + for d in response["data"]: + assert len(d["segments"]) > 0 + assert d["segments"][0]["success"] is True From ef17efdf3cb4e9b5ea420d53ec21a93feb785a32 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:33:14 -0300 Subject: [PATCH 088/105] BUG-288: Input parameters of utility models (#339) * Input parameters of utility models * Fix validation of utility models input --- aixplain/factories/model_factory/utils.py | 13 +++++++------ aixplain/modules/model/utility_model.py | 7 +++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/aixplain/factories/model_factory/utils.py b/aixplain/factories/model_factory/utils.py index d745885c..5a8d1503 100644 --- a/aixplain/factories/model_factory/utils.py +++ b/aixplain/factories/model_factory/utils.py @@ -30,7 +30,12 @@ def create_model_from_response(response: Dict) -> Model: if "language" in param["name"]: parameters[param["name"]] = [w["value"] for w in param["values"]] - function = Function(response["function"]["id"]) + function_id = response["function"]["id"] + function = Function(function_id) + function_io = FunctionInputOutput.get(function_id, None) + input_params = {param["code"]: param for param in function_io["spec"]["params"]} + output_params = {param["code"]: param for param in function_io["spec"]["output"]} + inputs, temperature = [], None ModelClass = Model if function == Function.TEXT_GENERATION: @@ -44,15 +49,11 @@ def create_model_from_response(response: Dict) -> Model: UtilityModelInput(name=param["name"], description=param.get("description", ""), type=DataType(param["dataType"])) for param in response["params"] ] + input_params = {param["name"]: param for param in response["params"]} created_at = None if "createdAt" in response and response["createdAt"]: created_at = datetime.fromisoformat(response["createdAt"].replace("Z", "+00:00")) - function_id = response["function"]["id"] - function = Function(function_id) - function_io = FunctionInputOutput.get(function_id, None) - input_params = {param["code"]: param for param in function_io["spec"]["params"]} - output_params = {param["code"]: param for param in function_io["spec"]["output"]} return ModelClass( response["id"], diff --git a/aixplain/modules/model/utility_model.py b/aixplain/modules/model/utility_model.py index a9fe514e..f3f597ef 100644 --- a/aixplain/modules/model/utility_model.py +++ b/aixplain/modules/model/utility_model.py @@ -35,10 +35,7 @@ class UtilityModelInput: description: Text type: DataType = DataType.TEXT - def __post_init__(self): - self.validate_type() - - def validate_type(self): + def validate(self): if self.type not in [DataType.TEXT, DataType.BOOLEAN, DataType.NUMBER]: raise ValueError("Utility Model Input type must be TEXT, BOOLEAN or NUMBER") @@ -124,6 +121,8 @@ def validate(self): self.description = description if len(self.inputs) == 0: self.inputs = inputs + for input in self.inputs: + input.validate() assert self.name and self.name.strip() != "", "Name is required" assert self.description and self.description.strip() != "", "Description is required" assert self.code and self.code.strip() != "", "Code is required" From 125527b94d3f0407d85415ec12b2dc872dcf4e68 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Thu, 19 Dec 2024 18:59:32 +0100 Subject: [PATCH 089/105] =?UTF-8?q?ENG-1235=20Utility=20functions=20has=20?= =?UTF-8?q?now=20special=20treatment=20as=20they=20need=20dyn=E2=80=A6=20(?= =?UTF-8?q?#344)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ENG-1235 Utility functions has now special treatment as they need dynamic param population from node spec * Setting default values as parameters * ENG-1235 Utility functions now has a special pipeline method called * ENG-1235 Only input params are taken from node spec for utility functions * ENG-1235 minor --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/model_factory/utils.py | 4 + aixplain/modules/pipeline/designer/nodes.py | 82 +- .../modules/pipeline/designer/pipeline.py | 48 +- aixplain/modules/pipeline/generate.py | 14 +- aixplain/modules/pipeline/pipeline.py | 4544 ++++++++--------- 5 files changed, 2324 insertions(+), 2368 deletions(-) diff --git a/aixplain/factories/model_factory/utils.py b/aixplain/factories/model_factory/utils.py index 5a8d1503..daa1f0db 100644 --- a/aixplain/factories/model_factory/utils.py +++ b/aixplain/factories/model_factory/utils.py @@ -29,6 +29,10 @@ def create_model_from_response(response: Dict) -> Model: for param in response["params"]: if "language" in param["name"]: parameters[param["name"]] = [w["value"] for w in param["values"]] + else: + values = [w["value"] for w in param["defaultValues"]] + if len(values) > 0: + parameters[param["name"]] = values function_id = response["function"]["id"] function = Function(function_id) diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index 70ff302f..7e6e1803 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -1,15 +1,9 @@ from typing import List, Union, Type, TYPE_CHECKING, Optional from aixplain.modules import Model -from aixplain.enums import DataType - -from .enums import ( - NodeType, - FunctionType, - RouteType, - Operation, - AssetType, -) +from aixplain.enums import DataType, Function + +from .enums import NodeType, FunctionType, RouteType, Operation, AssetType from .base import ( Node, Link, @@ -85,7 +79,15 @@ def populate_asset(self): if self.function: if self.asset.function.value != self.function: - raise ValueError(f"Function {self.function} is not supported by asset {self.asset_id}") # noqa + raise ValueError( + f"Function {self.function} is not supported by asset {self.asset_id}" + ) + + # Despite function field has been set, we should still dynamically + # populate parameters for Utility functions + if self.function == Function.UTILITIES: + self._auto_populate_params() + else: self.function = self.asset.function.value self._auto_populate_params() @@ -95,13 +97,24 @@ def populate_asset(self): def _auto_populate_params(self): from aixplain.enums.function import FunctionInputOutput - spec = FunctionInputOutput[self.asset.function.value]["spec"] - for item in spec["params"]: - self.inputs.create_param( - code=item["code"], - data_type=item["dataType"], - is_required=item["required"], - ) + spec = FunctionInputOutput[self.function]["spec"] + + # When the node is a utility, we need to create it's input parameters + # dynamically by referring the node data. + if self.function == Function.UTILITIES: + for param in self.asset.input_params.values(): + self.inputs.create_param( + code=param["name"], + data_type=param["dataType"], + is_required=param["required"], + ) + else: + for item in spec["params"]: + self.inputs.create_param( + code=item["code"], + data_type=item["dataType"], + is_required=item["required"], + ) for item in spec["output"]: self.outputs.create_param( @@ -111,6 +124,9 @@ def _auto_populate_params(self): def _auto_set_params(self): for k, v in self.asset.additional_info["parameters"].items(): + if k not in self.inputs: + continue + if isinstance(v, list): self.inputs[k] = v[0] else: @@ -140,6 +156,11 @@ class BareAsset(AssetNode[BareAssetInputs, BareAssetOutputs]): pass +class Utility(AssetNode[BareAssetInputs, BareAssetOutputs]): + + function = "utilities" + + class InputInputs(Inputs): pass @@ -217,7 +238,12 @@ class Output(Node[OutputInputs, OutputOutputs]): inputs_class: Type[TI] = OutputInputs outputs_class: Type[TO] = OutputOutputs - def __init__(self, data_types: Optional[List[DataType]] = None, pipeline: "DesignerPipeline" = None, **kwargs): + def __init__( + self, + data_types: Optional[List[DataType]] = None, + pipeline: "DesignerPipeline" = None, + **kwargs + ): super().__init__(pipeline=pipeline, **kwargs) self.data_types = data_types or [] @@ -278,7 +304,14 @@ class Route(Serializable): operation: Operation type: RouteType - def __init__(self, value: DataType, path: List[Union[Node, int]], operation: Operation, type: RouteType, **kwargs): + def __init__( + self, + value: DataType, + path: List[Union[Node, int]], + operation: Operation, + type: RouteType, + **kwargs + ): """ Post init method to convert the nodes to node numbers if they are nodes. @@ -294,8 +327,7 @@ def __init__(self, value: DataType, path: List[Union[Node, int]], operation: Ope # convert nodes to node numbers if they are nodes self.path = [ - node.number if isinstance(node, Node) else node - for node in self.path + node.number if isinstance(node, Node) else node for node in self.path ] def serialize(self) -> dict: @@ -334,7 +366,9 @@ class Router(Node[RouterInputs, RouterOutputs], LinkableMixin): inputs_class: Type[TI] = RouterInputs outputs_class: Type[TO] = RouterOutputs - def __init__(self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs): + def __init__( + self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs + ): super().__init__(pipeline=pipeline, **kwargs) self.routes = routes @@ -373,7 +407,9 @@ class Decision(Node[DecisionInputs, DecisionOutputs], LinkableMixin): inputs_class: Type[TI] = DecisionInputs outputs_class: Type[TO] = DecisionOutputs - def __init__(self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs): + def __init__( + self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs + ): super().__init__(pipeline=pipeline, **kwargs) self.routes = routes diff --git a/aixplain/modules/pipeline/designer/pipeline.py b/aixplain/modules/pipeline/designer/pipeline.py index 79013590..58c46112 100644 --- a/aixplain/modules/pipeline/designer/pipeline.py +++ b/aixplain/modules/pipeline/designer/pipeline.py @@ -3,7 +3,19 @@ from aixplain.enums import DataType from .base import Serializable, Node, Link -from .nodes import AssetNode, Decision, Script, Input, Output, Router, Route, BareReconstructor, BareSegmentor, BareMetric +from .nodes import ( + AssetNode, + Utility, + Decision, + Script, + Input, + Output, + Router, + Route, + BareReconstructor, + BareSegmentor, + BareMetric, +) from .enums import NodeType, RouteType, Operation from .mixins import OutputableMixin from .utils import find_prompt_params @@ -141,7 +153,9 @@ def special_prompt_validation(self, node: Node): node.inputs.text.is_required = False for match in matches: if match not in node.inputs: - raise ValueError(f"Param {match} of node {node.label} should be defined and set") + raise ValueError( + f"Param {match} of node {node.label} should be defined and set" + ) def validate_params(self): """ @@ -153,7 +167,9 @@ def validate_params(self): self.special_prompt_validation(node) for param in node.inputs: if param.is_required and not self.is_param_set(node, param): - raise ValueError(f"Param {param.code} of node {node.label} is required") + raise ValueError( + f"Param {param.code} of node {node.label} is required" + ) def validate(self): """ @@ -179,7 +195,11 @@ def get_link(self, from_node: int, to_node: int) -> Link: :return: the link """ return next( - (link for link in self.links if link.from_node == from_node and link.to_node == to_node), + ( + link + for link in self.links + if link.from_node == from_node and link.to_node == to_node + ), None, ) @@ -225,7 +245,9 @@ def infer_data_type(node): infer_data_type(self) infer_data_type(to_node) - def asset(self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs) -> T: + def asset( + self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs + ) -> T: """ Shortcut to create an asset node for the current pipeline. All params will be passed as keyword arguments to the node @@ -236,6 +258,22 @@ def asset(self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs """ return asset_class(asset_id, *args, pipeline=self, **kwargs) + def utility( + self, asset_id: str, *args, asset_class: Type[T] = Utility, **kwargs + ) -> T: + """ + Shortcut to create an utility nodes for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + if not issubclass(asset_class, Utility): + raise ValueError("`asset_class` should be a subclass of `Utility` class") + + return asset_class(asset_id, *args, pipeline=self, **kwargs) + def decision(self, *args, **kwargs) -> Decision: """ Shortcut to create an decision node for the current pipeline. diff --git a/aixplain/modules/pipeline/generate.py b/aixplain/modules/pipeline/generate.py index 8bfeecb3..eeb36412 100644 --- a/aixplain/modules/pipeline/generate.py +++ b/aixplain/modules/pipeline/generate.py @@ -5,6 +5,7 @@ from jinja2 import Environment, BaseLoader from aixplain.utils import config +from aixplain.enums import Function SEGMENTOR_FUNCTIONS = [ "split-on-linebreak", @@ -143,9 +144,16 @@ def populate_specs(functions: list): """ function_class_specs = [] for function in functions: + # Utility functions has dynamic input parameters so they are not + # subject to static class generation + if function["id"] == Function.UTILITIES: + continue + # slugify function name by trimming some special chars and # transforming it to snake case - function_name = function["id"].replace("-", "_").replace("(", "_").replace(")", "_") + function_name = ( + function["id"].replace("-", "_").replace("(", "_").replace(")", "_") + ) base_class = "AssetNode" is_segmentor = function["id"] in SEGMENTOR_FUNCTIONS is_reconstructor = function["id"] in RECONSTRUCTOR_FUNCTIONS @@ -153,7 +161,9 @@ def populate_specs(functions: list): base_class = "BaseSegmentor" elif is_reconstructor: base_class = "BaseReconstructor" - elif "metric" in function_name.split("_"): # noqa: Advise a better distinguisher please + elif "metric" in function_name.split( + "_" + ): # noqa: Advise a better distinguisher please base_class = "BaseMetric" spec = { diff --git a/aixplain/modules/pipeline/pipeline.py b/aixplain/modules/pipeline/pipeline.py index bf67ff15..27091770 100644 --- a/aixplain/modules/pipeline/pipeline.py +++ b/aixplain/modules/pipeline/pipeline.py @@ -4,7 +4,18 @@ from typing import Union, Type from aixplain.enums import DataType -from .designer import InputParam, OutputParam, Inputs, Outputs, TI, TO, AssetNode, BaseReconstructor, BaseSegmentor, BaseMetric +from .designer import ( + InputParam, + OutputParam, + Inputs, + Outputs, + TI, + TO, + AssetNode, + BaseReconstructor, + BaseSegmentor, + BaseMetric +) from .default import DefaultPipeline from aixplain.modules import asset @@ -27,14 +38,13 @@ def __init__(self, node=None): class ObjectDetection(AssetNode[ObjectDetectionInputs, ObjectDetectionOutputs]): """ - Object Detection is a computer vision technology that identifies and locates - objects within an image, typically by drawing bounding boxes around the - detected objects and classifying them into predefined categories. + Object Detection is a computer vision technology that identifies and locates +objects within an image, typically by drawing bounding boxes around the +detected objects and classifying them into predefined categories. - InputType: video - OutputType: text + InputType: video + OutputType: text """ - function: str = "object-detection" input_type: str = DataType.VIDEO output_type: str = DataType.TEXT @@ -61,13 +71,12 @@ def __init__(self, node=None): class LanguageIdentification(AssetNode[LanguageIdentificationInputs, LanguageIdentificationOutputs]): """ - Language Identification is the process of automatically determining the - language in which a given piece of text is written. + Detects the language in which a given text is written, aiding in multilingual +platforms or content localization. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - function: str = "language-identification" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -76,17 +85,17 @@ class LanguageIdentification(AssetNode[LanguageIdentificationInputs, LanguageIde outputs_class: Type[TO] = LanguageIdentificationOutputs -class OcrInputs(Inputs): +class DepthEstimationInputs(Inputs): + language: InputParam = None image: InputParam = None - featuretypes: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.featuretypes = self.create_param(code="featuretypes", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class OcrOutputs(Outputs): +class DepthEstimationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -94,23 +103,21 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class Ocr(AssetNode[OcrInputs, OcrOutputs]): +class DepthEstimation(AssetNode[DepthEstimationInputs, DepthEstimationOutputs]): """ - OCR, or Optical Character Recognition, is a technology that converts different - types of documents, such as scanned paper documents, PDFs, or images captured - by a digital camera, into editable and searchable data by recognizing and - extracting text from the images. + Depth estimation is a computational process that determines the distance of +objects from a viewpoint, typically using visual data from cameras or sensors +to create a three-dimensional understanding of a scene. - InputType: image - OutputType: text + InputType: image + OutputType: text """ - - function: str = "ocr" + function: str = "depth-estimation" input_type: str = DataType.IMAGE output_type: str = DataType.TEXT - inputs_class: Type[TI] = OcrInputs - outputs_class: Type[TO] = OcrOutputs + inputs_class: Type[TI] = DepthEstimationInputs + outputs_class: Type[TO] = DepthEstimationOutputs class ScriptExecutionInputs(Inputs): @@ -131,14 +138,13 @@ def __init__(self, node=None): class ScriptExecution(AssetNode[ScriptExecutionInputs, ScriptExecutionOutputs]): """ - Script Execution refers to the process of running a set of programmed - instructions or code within a computing environment, enabling the automated - performance of tasks, calculations, or operations as defined by the script. + Script Execution refers to the process of running a set of programmed +instructions or code within a computing environment, enabling the automated +performance of tasks, calculations, or operations as defined by the script. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - function: str = "script-execution" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -147,157 +153,157 @@ class ScriptExecution(AssetNode[ScriptExecutionInputs, ScriptExecutionOutputs]): outputs_class: Type[TO] = ScriptExecutionOutputs -class ImageLabelDetectionInputs(Inputs): +class ImageEmbeddingInputs(Inputs): + language: InputParam = None image: InputParam = None - min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class ImageLabelDetectionOutputs(Outputs): +class ImageEmbeddingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ImageLabelDetection(AssetNode[ImageLabelDetectionInputs, ImageLabelDetectionOutputs]): +class ImageEmbedding(AssetNode[ImageEmbeddingInputs, ImageEmbeddingOutputs]): """ - Image Label Detection is a function that automatically identifies and assigns - descriptive tags or labels to objects, scenes, or elements within an image, - enabling easier categorization, search, and analysis of visual content. + Image Embedding is a process that transforms an image into a fixed-dimensional +vector representation, capturing its essential features and enabling efficient +comparison, retrieval, and analysis in various machine learning and computer +vision tasks. - InputType: image - OutputType: label + InputType: image + OutputType: text """ - - function: str = "image-label-detection" + function: str = "image-embedding" input_type: str = DataType.IMAGE - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = ImageLabelDetectionInputs - outputs_class: Type[TO] = ImageLabelDetectionOutputs + inputs_class: Type[TI] = ImageEmbeddingInputs + outputs_class: Type[TO] = ImageEmbeddingOutputs -class ImageCaptioningInputs(Inputs): +class ImageToVideoGenerationInputs(Inputs): + language: InputParam = None image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class ImageCaptioningOutputs(Outputs): +class ImageToVideoGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) -class ImageCaptioning(AssetNode[ImageCaptioningInputs, ImageCaptioningOutputs]): +class ImageToVideoGeneration(AssetNode[ImageToVideoGenerationInputs, ImageToVideoGenerationOutputs]): """ - Image Captioning is a process that involves generating a textual description of - an image, typically using machine learning models to analyze the visual content - and produce coherent and contextually relevant sentences that describe the - objects, actions, and scenes depicted in the image. + The Image To Video Generation function transforms a series of static images +into a cohesive, dynamic video sequence, often incorporating transitions, +effects, and synchronization with audio to create a visually engaging +narrative. - InputType: image - OutputType: text + InputType: image + OutputType: video """ - - function: str = "image-captioning" + function: str = "image-to-video-generation" input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT + output_type: str = DataType.VIDEO - inputs_class: Type[TI] = ImageCaptioningInputs - outputs_class: Type[TO] = ImageCaptioningOutputs + inputs_class: Type[TI] = ImageToVideoGenerationInputs + outputs_class: Type[TO] = ImageToVideoGenerationOutputs -class AudioLanguageIdentificationInputs(Inputs): - audio: InputParam = None +class ImageImpaintingInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class AudioLanguageIdentificationOutputs(Outputs): - data: OutputParam = None +class ImageImpaintingOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class AudioLanguageIdentification(AssetNode[AudioLanguageIdentificationInputs, AudioLanguageIdentificationOutputs]): +class ImageImpainting(AssetNode[ImageImpaintingInputs, ImageImpaintingOutputs]): """ - Audio Language Identification is a process that involves analyzing an audio - recording to determine the language being spoken. + Image inpainting is a process that involves filling in missing or damaged parts +of an image in a way that is visually coherent and seamlessly blends with the +surrounding areas, often using advanced algorithms and techniques to restore +the image to its original or intended appearance. - InputType: audio - OutputType: label + InputType: image + OutputType: image """ + function: str = "image-impainting" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE - function: str = "audio-language-identification" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = AudioLanguageIdentificationInputs - outputs_class: Type[TO] = AudioLanguageIdentificationOutputs + inputs_class: Type[TI] = ImageImpaintingInputs + outputs_class: Type[TO] = ImageImpaintingOutputs -class AsrAgeClassificationInputs(Inputs): - source_audio: InputParam = None +class StyleTransferInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class AsrAgeClassificationOutputs(Outputs): - data: OutputParam = None +class StyleTransferOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class AsrAgeClassification(AssetNode[AsrAgeClassificationInputs, AsrAgeClassificationOutputs]): +class StyleTransfer(AssetNode[StyleTransferInputs, StyleTransferOutputs]): """ - The ASR Age Classification function is designed to analyze audio recordings of - speech to determine the speaker's age group by leveraging automatic speech - recognition (ASR) technology and machine learning algorithms. + Style Transfer is a technique in artificial intelligence that applies the +visual style of one image (such as the brushstrokes of a famous painting) to +the content of another image, effectively blending the artistic elements of the +first image with the subject matter of the second. - InputType: audio - OutputType: label + InputType: image + OutputType: image """ + function: str = "style-transfer" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE - function: str = "asr-age-classification" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = AsrAgeClassificationInputs - outputs_class: Type[TO] = AsrAgeClassificationOutputs + inputs_class: Type[TI] = StyleTransferInputs + outputs_class: Type[TO] = StyleTransferOutputs -class BenchmarkScoringMtInputs(Inputs): - input: InputParam = None - text: InputParam = None +class MultiClassTextClassificationInputs(Inputs): + language: InputParam = None text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.input = self.create_param(code="input", data_type=DataType.TEXT, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class BenchmarkScoringMtOutputs(Outputs): +class MultiClassTextClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -305,33 +311,34 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class BenchmarkScoringMt(AssetNode[BenchmarkScoringMtInputs, BenchmarkScoringMtOutputs]): +class MultiClassTextClassification(AssetNode[MultiClassTextClassificationInputs, MultiClassTextClassificationOutputs]): """ - Benchmark Scoring MT is a function designed to evaluate and score machine - translation systems by comparing their output against a set of predefined - benchmarks, thereby assessing their accuracy and performance. + Multi Class Text Classification is a natural language processing task that +involves categorizing a given text into one of several predefined classes or +categories based on its content. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "benchmark-scoring-mt" + function: str = "multi-class-text-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = BenchmarkScoringMtInputs - outputs_class: Type[TO] = BenchmarkScoringMtOutputs + inputs_class: Type[TI] = MultiClassTextClassificationInputs + outputs_class: Type[TO] = MultiClassTextClassificationOutputs -class AsrGenderClassificationInputs(Inputs): - source_audio: InputParam = None +class PartOfSpeechTaggingInputs(Inputs): + language: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class AsrGenderClassificationOutputs(Outputs): +class PartOfSpeechTaggingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -339,34 +346,33 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AsrGenderClassification(AssetNode[AsrGenderClassificationInputs, AsrGenderClassificationOutputs]): +class PartOfSpeechTagging(AssetNode[PartOfSpeechTaggingInputs, PartOfSpeechTaggingOutputs]): """ - The ASR Gender Classification function analyzes audio recordings to determine - and classify the speaker's gender based on their voice characteristics. + Part of Speech Tagging is a natural language processing task that involves +assigning each word in a sentence its corresponding part of speech, such as +noun, verb, adjective, or adverb, based on its role and context within the +sentence. - InputType: audio - OutputType: label + InputType: text + OutputType: label """ - - function: str = "asr-gender-classification" - input_type: str = DataType.AUDIO + function: str = "part-of-speech-tagging" + input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = AsrGenderClassificationInputs - outputs_class: Type[TO] = AsrGenderClassificationOutputs + inputs_class: Type[TI] = PartOfSpeechTaggingInputs + outputs_class: Type[TO] = PartOfSpeechTaggingOutputs -class BaseModelInputs(Inputs): - language: InputParam = None +class MetricAggregationInputs(Inputs): text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class BaseModelOutputs(Outputs): +class MetricAggregationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -374,331 +380,382 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class BaseModel(AssetNode[BaseModelInputs, BaseModelOutputs]): +class MetricAggregation(BaseMetric[MetricAggregationInputs, MetricAggregationOutputs]): """ - The Base-Model function serves as a foundational framework designed to provide - essential features and capabilities upon which more specialized or advanced - models can be built and customized. + Metric Aggregation is a function that computes and summarizes numerical data by +applying statistical operations, such as averaging, summing, or finding the +minimum and maximum values, to provide insights and facilitate analysis of +large datasets. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "base-model" + function: str = "metric-aggregation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = BaseModelInputs - outputs_class: Type[TO] = BaseModelOutputs + inputs_class: Type[TI] = MetricAggregationInputs + outputs_class: Type[TO] = MetricAggregationOutputs -class LanguageIdentificationAudioInputs(Inputs): - audio: InputParam = None +class ImageColorizationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class LanguageIdentificationAudioOutputs(Outputs): - data: OutputParam = None +class ImageColorizationOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class LanguageIdentificationAudio(AssetNode[LanguageIdentificationAudioInputs, LanguageIdentificationAudioOutputs]): +class ImageColorization(AssetNode[ImageColorizationInputs, ImageColorizationOutputs]): """ - The Language Identification Audio function analyzes audio input to determine - and identify the language being spoken. + Image colorization is a process that involves adding color to grayscale images, +transforming them from black-and-white to full-color representations, often +using advanced algorithms and machine learning techniques to predict and apply +the appropriate hues and shades. - InputType: audio - OutputType: label + InputType: image + OutputType: image """ + function: str = "image-colorization" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE - function: str = "language-identification-audio" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = LanguageIdentificationAudioInputs - outputs_class: Type[TO] = LanguageIdentificationAudioOutputs + inputs_class: Type[TI] = ImageColorizationInputs + outputs_class: Type[TO] = ImageColorizationOutputs -class LoglikelihoodInputs(Inputs): +class IntentClassificationInputs(Inputs): + language: InputParam = None text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class LoglikelihoodOutputs(Outputs): +class IntentClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.NUMBER) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class Loglikelihood(AssetNode[LoglikelihoodInputs, LoglikelihoodOutputs]): +class IntentClassification(AssetNode[IntentClassificationInputs, IntentClassificationOutputs]): """ - The Log Likelihood function measures the probability of observing the given - data under a specific statistical model by taking the natural logarithm of the - likelihood function, thereby transforming the product of probabilities into a - sum, which simplifies the process of optimization and parameter estimation. + Intent Classification is a natural language processing task that involves +analyzing and categorizing user text input to determine the underlying purpose +or goal behind the communication, such as booking a flight, asking for weather +information, or setting a reminder. - InputType: text - OutputType: number + InputType: text + OutputType: label """ - - function: str = "loglikelihood" + function: str = "intent-classification" input_type: str = DataType.TEXT - output_type: str = DataType.NUMBER + output_type: str = DataType.LABEL - inputs_class: Type[TI] = LoglikelihoodInputs - outputs_class: Type[TO] = LoglikelihoodOutputs + inputs_class: Type[TI] = IntentClassificationInputs + outputs_class: Type[TO] = IntentClassificationOutputs -class VideoEmbeddingInputs(Inputs): - language: InputParam = None - video: InputParam = None +class AudioIntentDetectionInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) -class VideoEmbeddingOutputs(Outputs): +class AudioIntentDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.EMBEDDING) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VideoEmbedding(AssetNode[VideoEmbeddingInputs, VideoEmbeddingOutputs]): +class AudioIntentDetection(AssetNode[AudioIntentDetectionInputs, AudioIntentDetectionOutputs]): """ - Video Embedding is a process that transforms video content into a fixed- - dimensional vector representation, capturing essential features and patterns to - facilitate tasks such as retrieval, classification, and recommendation. + Audio Intent Detection is a process that involves analyzing audio signals to +identify and interpret the underlying intentions or purposes behind spoken +words, enabling systems to understand and respond appropriately to human +speech. - InputType: video - OutputType: embedding + InputType: audio + OutputType: label """ + function: str = "audio-intent-detection" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "video-embedding" - input_type: str = DataType.VIDEO - output_type: str = DataType.EMBEDDING - - inputs_class: Type[TI] = VideoEmbeddingInputs - outputs_class: Type[TO] = VideoEmbeddingOutputs + inputs_class: Type[TI] = AudioIntentDetectionInputs + outputs_class: Type[TO] = AudioIntentDetectionOutputs -class TextSegmenationInputs(Inputs): +class AsrQualityEstimationInputs(Inputs): text: InputParam = None - language: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextSegmenationOutputs(Outputs): +class AsrQualityEstimationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextSegmenation(AssetNode[TextSegmenationInputs, TextSegmenationOutputs]): +class AsrQualityEstimation(AssetNode[AsrQualityEstimationInputs, AsrQualityEstimationOutputs]): """ - Text Segmentation is the process of dividing a continuous text into meaningful - units, such as words, sentences, or topics, to facilitate easier analysis and - understanding. + ASR Quality Estimation is a process that evaluates the accuracy and reliability +of automatic speech recognition systems by analyzing their performance in +transcribing spoken language into text. - InputType: text - OutputType: text + InputType: text + OutputType: label """ + function: str = "asr-quality-estimation" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "text-segmenation" + inputs_class: Type[TI] = AsrQualityEstimationInputs + outputs_class: Type[TO] = AsrQualityEstimationOutputs + + +class SearchInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class SearchOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class Search(AssetNode[SearchInputs, SearchOutputs]): + """ + An algorithm that identifies and returns data or items that match particular +keywords or conditions from a dataset. A fundamental tool for databases and +websites. + + InputType: text + OutputType: text + """ + function: str = "search" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextSegmenationInputs - outputs_class: Type[TO] = TextSegmenationOutputs + inputs_class: Type[TI] = SearchInputs + outputs_class: Type[TO] = SearchOutputs -class ImageEmbeddingInputs(Inputs): +class VisemeGenerationInputs(Inputs): + text: InputParam = None language: InputParam = None - image: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class ImageEmbeddingOutputs(Outputs): +class VisemeGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class ImageEmbedding(AssetNode[ImageEmbeddingInputs, ImageEmbeddingOutputs]): +class VisemeGeneration(AssetNode[VisemeGenerationInputs, VisemeGenerationOutputs]): """ - Image Embedding is a process that transforms an image into a fixed-dimensional - vector representation, capturing its essential features and enabling efficient - comparison, retrieval, and analysis in various machine learning and computer - vision tasks. + Viseme Generation is the process of creating visual representations of +phonemes, which are the distinct units of sound in speech, to synchronize lip +movements with spoken words in animations or virtual avatars. - InputType: image - OutputType: text + InputType: text + OutputType: label """ + function: str = "viseme-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "image-embedding" - input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = ImageEmbeddingInputs - outputs_class: Type[TO] = ImageEmbeddingOutputs + inputs_class: Type[TI] = VisemeGenerationInputs + outputs_class: Type[TO] = VisemeGenerationOutputs -class ImageManipulationInputs(Inputs): +class OcrInputs(Inputs): image: InputParam = None - targetimage: InputParam = None + featuretypes: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.targetimage = self.create_param(code="targetimage", data_type=DataType.IMAGE, is_required=True) + self.featuretypes = self.create_param(code="featuretypes", data_type=DataType.TEXT, is_required=True) -class ImageManipulationOutputs(Outputs): - image: OutputParam = None +class OcrOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ImageManipulation(AssetNode[ImageManipulationInputs, ImageManipulationOutputs]): +class Ocr(AssetNode[OcrInputs, OcrOutputs]): """ - Image Manipulation refers to the process of altering or enhancing digital - images using various techniques and tools to achieve desired visual effects, - correct imperfections, or transform the image's appearance. + Converts images of typed, handwritten, or printed text into machine-encoded +text. Used in digitizing printed texts for data retrieval. - InputType: image - OutputType: image + InputType: image + OutputType: text """ - - function: str = "image-manipulation" + function: str = "ocr" input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE + output_type: str = DataType.TEXT - inputs_class: Type[TI] = ImageManipulationInputs - outputs_class: Type[TO] = ImageManipulationOutputs + inputs_class: Type[TI] = OcrInputs + outputs_class: Type[TO] = OcrOutputs -class ImageToVideoGenerationInputs(Inputs): +class LoglikelihoodInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class LoglikelihoodOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.NUMBER) + + +class Loglikelihood(AssetNode[LoglikelihoodInputs, LoglikelihoodOutputs]): + """ + The Log Likelihood function measures the probability of observing the given +data under a specific statistical model by taking the natural logarithm of the +likelihood function, thereby transforming the product of probabilities into a +sum, which simplifies the process of optimization and parameter estimation. + + InputType: text + OutputType: number + """ + function: str = "loglikelihood" + input_type: str = DataType.TEXT + output_type: str = DataType.NUMBER + + inputs_class: Type[TI] = LoglikelihoodInputs + outputs_class: Type[TO] = LoglikelihoodOutputs + + +class VideoEmbeddingInputs(Inputs): language: InputParam = None - image: InputParam = None + video: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=False) -class ImageToVideoGenerationOutputs(Outputs): +class VideoEmbeddingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.VIDEO) + self.data = self.create_param(code="data", data_type=DataType.EMBEDDING) -class ImageToVideoGeneration(AssetNode[ImageToVideoGenerationInputs, ImageToVideoGenerationOutputs]): +class VideoEmbedding(AssetNode[VideoEmbeddingInputs, VideoEmbeddingOutputs]): """ - The Image To Video Generation function transforms a series of static images - into a cohesive, dynamic video sequence, often incorporating transitions, - effects, and synchronization with audio to create a visually engaging - narrative. + Video Embedding is a process that transforms video content into a fixed- +dimensional vector representation, capturing essential features and patterns to +facilitate tasks such as retrieval, classification, and recommendation. - InputType: image - OutputType: video + InputType: video + OutputType: embedding """ + function: str = "video-embedding" + input_type: str = DataType.VIDEO + output_type: str = DataType.EMBEDDING - function: str = "image-to-video-generation" - input_type: str = DataType.IMAGE - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = ImageToVideoGenerationInputs - outputs_class: Type[TO] = ImageToVideoGenerationOutputs + inputs_class: Type[TI] = VideoEmbeddingInputs + outputs_class: Type[TO] = VideoEmbeddingOutputs -class AudioForcedAlignmentInputs(Inputs): - audio: InputParam = None +class TextSegmenationInputs(Inputs): text: InputParam = None language: InputParam = None - dialect: InputParam = None - script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class AudioForcedAlignmentOutputs(Outputs): - text: OutputParam = None - audio: OutputParam = None +class TextSegmenationOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class AudioForcedAlignment(AssetNode[AudioForcedAlignmentInputs, AudioForcedAlignmentOutputs]): +class TextSegmenation(AssetNode[TextSegmenationInputs, TextSegmenationOutputs]): """ - Audio Forced Alignment is a process that synchronizes a given audio recording - with its corresponding transcript by precisely aligning each spoken word or - phoneme to its exact timing within the audio. + Text Segmentation is the process of dividing a continuous text into meaningful +units, such as words, sentences, or topics, to facilitate easier analysis and +understanding. - InputType: audio - OutputType: audio + InputType: text + OutputType: text """ + function: str = "text-segmenation" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - function: str = "audio-forced-alignment" - input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = AudioForcedAlignmentInputs - outputs_class: Type[TO] = AudioForcedAlignmentOutputs + inputs_class: Type[TI] = TextSegmenationInputs + outputs_class: Type[TO] = TextSegmenationOutputs -class BenchmarkScoringAsrInputs(Inputs): - input: InputParam = None - text: InputParam = None - text: InputParam = None +class ExpressionDetectionInputs(Inputs): + media: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.input = self.create_param(code="input", data_type=DataType.AUDIO, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.media = self.create_param(code="media", data_type=DataType.IMAGE, is_required=True) -class BenchmarkScoringAsrOutputs(Outputs): +class ExpressionDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -706,215 +763,208 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class BenchmarkScoringAsr(AssetNode[BenchmarkScoringAsrInputs, BenchmarkScoringAsrOutputs]): +class ExpressionDetection(AssetNode[ExpressionDetectionInputs, ExpressionDetectionOutputs]): """ - Benchmark Scoring ASR is a function that evaluates and compares the performance - of automatic speech recognition systems by analyzing their accuracy, speed, and - other relevant metrics against a standardized set of benchmarks. + Expression Detection is the process of identifying and analyzing facial +expressions to interpret emotions or intentions using AI and computer vision +techniques. - InputType: audio - OutputType: label + InputType: text + OutputType: label """ - - function: str = "benchmark-scoring-asr" - input_type: str = DataType.AUDIO + function: str = "expression-detection" + input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = BenchmarkScoringAsrInputs - outputs_class: Type[TO] = BenchmarkScoringAsrOutputs + inputs_class: Type[TI] = ExpressionDetectionInputs + outputs_class: Type[TO] = ExpressionDetectionOutputs -class VisualQuestionAnsweringInputs(Inputs): - text: InputParam = None +class SpeechClassificationInputs(Inputs): + audio: InputParam = None language: InputParam = None - image: InputParam = None + script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class VisualQuestionAnsweringOutputs(Outputs): +class SpeechClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VisualQuestionAnswering(AssetNode[VisualQuestionAnsweringInputs, VisualQuestionAnsweringOutputs]): +class SpeechClassification(AssetNode[SpeechClassificationInputs, SpeechClassificationOutputs]): """ - Visual Question Answering (VQA) is a task in artificial intelligence that - involves analyzing an image and providing accurate, contextually relevant - answers to questions posed about the visual content of that image. + Categorizes audio clips based on their content, aiding in content organization +and targeted actions. - InputType: image - OutputType: video + InputType: audio + OutputType: label """ + function: str = "speech-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "visual-question-answering" - input_type: str = DataType.IMAGE - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = VisualQuestionAnsweringInputs - outputs_class: Type[TO] = VisualQuestionAnsweringOutputs + inputs_class: Type[TI] = SpeechClassificationInputs + outputs_class: Type[TO] = SpeechClassificationOutputs -class DocumentImageParsingInputs(Inputs): - image: InputParam = None +class InverseTextNormalizationInputs(Inputs): + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class DocumentImageParsingOutputs(Outputs): +class InverseTextNormalizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class DocumentImageParsing(AssetNode[DocumentImageParsingInputs, DocumentImageParsingOutputs]): +class InverseTextNormalization(AssetNode[InverseTextNormalizationInputs, InverseTextNormalizationOutputs]): """ - Document Image Parsing is the process of analyzing and converting scanned or - photographed images of documents into structured, machine-readable formats by - identifying and extracting text, layout, and other relevant information. + Inverse Text Normalization is the process of converting spoken or written +language in its normalized form, such as numbers, dates, and abbreviations, +back into their original, more complex or detailed textual representations. - InputType: image - OutputType: text + InputType: text + OutputType: label """ + function: str = "inverse-text-normalization" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "document-image-parsing" - input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = DocumentImageParsingInputs - outputs_class: Type[TO] = DocumentImageParsingOutputs + inputs_class: Type[TI] = InverseTextNormalizationInputs + outputs_class: Type[TO] = InverseTextNormalizationOutputs -class DocumentInformationExtractionInputs(Inputs): - image: InputParam = None +class ExtractAudioFromVideoInputs(Inputs): + video: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) -class DocumentInformationExtractionOutputs(Outputs): +class ExtractAudioFromVideoOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class DocumentInformationExtraction(AssetNode[DocumentInformationExtractionInputs, DocumentInformationExtractionOutputs]): +class ExtractAudioFromVideo(AssetNode[ExtractAudioFromVideoInputs, ExtractAudioFromVideoOutputs]): """ - Document Information Extraction is the process of automatically identifying, - extracting, and structuring relevant data from unstructured or semi-structured - documents, such as invoices, receipts, contracts, and forms, to facilitate - easier data management and analysis. + Isolates and extracts audio tracks from video files, aiding in audio analysis +or transcription tasks. - InputType: image - OutputType: text + InputType: video + OutputType: audio """ + function: str = "extract-audio-from-video" + input_type: str = DataType.VIDEO + output_type: str = DataType.AUDIO - function: str = "document-information-extraction" - input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = DocumentInformationExtractionInputs - outputs_class: Type[TO] = DocumentInformationExtractionOutputs + inputs_class: Type[TI] = ExtractAudioFromVideoInputs + outputs_class: Type[TO] = ExtractAudioFromVideoOutputs -class DepthEstimationInputs(Inputs): - language: InputParam = None +class ImageCompressionInputs(Inputs): image: InputParam = None + apl_qfactor: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.apl_qfactor = self.create_param(code="apl_qfactor", data_type=DataType.TEXT, is_required=False) -class DepthEstimationOutputs(Outputs): - data: OutputParam = None +class ImageCompressionOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class DepthEstimation(AssetNode[DepthEstimationInputs, DepthEstimationOutputs]): +class ImageCompression(AssetNode[ImageCompressionInputs, ImageCompressionOutputs]): """ - Depth estimation is a computational process that determines the distance of - objects from a viewpoint, typically using visual data from cameras or sensors - to create a three-dimensional understanding of a scene. + Reduces the size of image files without significantly compromising their visual +quality. Useful for optimizing storage and improving webpage load times. - InputType: image - OutputType: text + InputType: image + OutputType: image """ - - function: str = "depth-estimation" + function: str = "image-compression" input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT + output_type: str = DataType.IMAGE - inputs_class: Type[TI] = DepthEstimationInputs - outputs_class: Type[TO] = DepthEstimationOutputs + inputs_class: Type[TI] = ImageCompressionInputs + outputs_class: Type[TO] = ImageCompressionOutputs -class VideoGenerationInputs(Inputs): - text: InputParam = None +class NoiseRemovalInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) -class VideoGenerationOutputs(Outputs): +class NoiseRemovalOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.VIDEO) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class VideoGeneration(AssetNode[VideoGenerationInputs, VideoGenerationOutputs]): +class NoiseRemoval(AssetNode[NoiseRemovalInputs, NoiseRemovalOutputs]): """ - Video Generation is the process of creating video content through automated or - semi-automated means, often utilizing algorithms, artificial intelligence, or - software tools to produce visual and audio elements that can range from simple - animations to complex, realistic scenes. + Noise Removal is a process that involves identifying and eliminating unwanted +random variations or disturbances from an audio signal to enhance the clarity +and quality of the underlying information. - InputType: text - OutputType: video + InputType: audio + OutputType: audio """ + function: str = "noise-removal" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO - function: str = "video-generation" - input_type: str = DataType.TEXT - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = VideoGenerationInputs - outputs_class: Type[TO] = VideoGenerationOutputs + inputs_class: Type[TI] = NoiseRemovalInputs + outputs_class: Type[TO] = NoiseRemovalOutputs -class ReferencelessAudioGenerationMetricInputs(Inputs): - hypotheses: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None +class TextSummarizationInputs(Inputs): + text: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) - self.sources = self.create_param(code="sources", data_type=DataType.AUDIO, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class ReferencelessAudioGenerationMetricOutputs(Outputs): +class TextSummarizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -922,103 +972,109 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ReferencelessAudioGenerationMetric( - BaseMetric[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs] -): +class TextSummarization(AssetNode[TextSummarizationInputs, TextSummarizationOutputs]): """ - The Referenceless Audio Generation Metric is a tool designed to evaluate the - quality of generated audio content without the need for a reference or original - audio sample for comparison. + Extracts the main points from a larger body of text, producing a concise +summary without losing the primary message. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "referenceless-audio-generation-metric" + function: str = "text-summarization" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = ReferencelessAudioGenerationMetricInputs - outputs_class: Type[TO] = ReferencelessAudioGenerationMetricOutputs + inputs_class: Type[TI] = TextSummarizationInputs + outputs_class: Type[TO] = TextSummarizationOutputs -class MultiClassImageClassificationInputs(Inputs): - image: InputParam = None +class TextGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.TEXT, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class MultiClassImageClassificationOutputs(Outputs): +class TextGenerationMetricOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class MultiClassImageClassification(AssetNode[MultiClassImageClassificationInputs, MultiClassImageClassificationOutputs]): +class TextGenerationMetric(BaseMetric[TextGenerationMetricInputs, TextGenerationMetricOutputs]): """ - Multi Class Image Classification is a machine learning task where an algorithm - is trained to categorize images into one of several predefined classes or - categories based on their visual content. + A Text Generation Metric is a quantitative measure used to evaluate the quality +and effectiveness of text produced by natural language processing models, often +assessing aspects such as coherence, relevance, fluency, and adherence to given +prompts or instructions. - InputType: image - OutputType: label + InputType: text + OutputType: text """ + function: str = "text-generation-metric" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - function: str = "multi-class-image-classification" - input_type: str = DataType.IMAGE - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = MultiClassImageClassificationInputs - outputs_class: Type[TO] = MultiClassImageClassificationOutputs + inputs_class: Type[TI] = TextGenerationMetricInputs + outputs_class: Type[TO] = TextGenerationMetricOutputs -class SemanticSegmentationInputs(Inputs): +class ImageCaptioningInputs(Inputs): image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) -class SemanticSegmentationOutputs(Outputs): +class ImageCaptioningOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class SemanticSegmentation(AssetNode[SemanticSegmentationInputs, SemanticSegmentationOutputs]): +class ImageCaptioning(AssetNode[ImageCaptioningInputs, ImageCaptioningOutputs]): """ - Semantic segmentation is a computer vision process that involves classifying - each pixel in an image into a predefined category, effectively partitioning the - image into meaningful segments based on the objects or regions they represent. + Image Captioning is a process that involves generating a textual description of +an image, typically using machine learning models to analyze the visual content +and produce coherent and contextually relevant sentences that describe the +objects, actions, and scenes depicted in the image. - InputType: image - OutputType: label + InputType: image + OutputType: text """ - - function: str = "semantic-segmentation" + function: str = "image-captioning" input_type: str = DataType.IMAGE - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = SemanticSegmentationInputs - outputs_class: Type[TO] = SemanticSegmentationOutputs + inputs_class: Type[TI] = ImageCaptioningInputs + outputs_class: Type[TO] = ImageCaptioningOutputs -class InstanceSegmentationInputs(Inputs): - image: InputParam = None +class BenchmarkScoringMtInputs(Inputs): + input: InputParam = None + text: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.input = self.create_param(code="input", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class InstanceSegmentationOutputs(Outputs): +class BenchmarkScoringMtOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1026,137 +1082,138 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class InstanceSegmentation(AssetNode[InstanceSegmentationInputs, InstanceSegmentationOutputs]): +class BenchmarkScoringMt(AssetNode[BenchmarkScoringMtInputs, BenchmarkScoringMtOutputs]): """ - Instance segmentation is a computer vision task that involves detecting and - delineating each distinct object within an image, assigning a unique label and - precise boundary to every individual instance of objects, even if they belong - to the same category. + Benchmark Scoring MT is a function designed to evaluate and score machine +translation systems by comparing their output against a set of predefined +benchmarks, thereby assessing their accuracy and performance. - InputType: image - OutputType: label + InputType: text + OutputType: label """ - - function: str = "instance-segmentation" - input_type: str = DataType.IMAGE + function: str = "benchmark-scoring-mt" + input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = InstanceSegmentationInputs - outputs_class: Type[TO] = InstanceSegmentationOutputs + inputs_class: Type[TI] = BenchmarkScoringMtInputs + outputs_class: Type[TO] = BenchmarkScoringMtOutputs -class ImageColorizationInputs(Inputs): - image: InputParam = None +class SpeakerDiarizationAudioInputs(Inputs): + audio: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class ImageColorizationOutputs(Outputs): - image: OutputParam = None +class SpeakerDiarizationAudioOutputs(Outputs): + data: OutputParam = None + audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class ImageColorization(AssetNode[ImageColorizationInputs, ImageColorizationOutputs]): +class SpeakerDiarizationAudio(BaseSegmentor[SpeakerDiarizationAudioInputs, SpeakerDiarizationAudioOutputs]): """ - Image colorization is a process that involves adding color to grayscale images, - transforming them from black-and-white to full-color representations, often - using advanced algorithms and machine learning techniques to predict and apply - the appropriate hues and shades. + Identifies individual speakers and their respective speech segments within an +audio clip. Ideal for multi-speaker recordings or conference calls. - InputType: image - OutputType: image + InputType: audio + OutputType: label """ + function: str = "speaker-diarization-audio" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "image-colorization" - input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE - - inputs_class: Type[TI] = ImageColorizationInputs - outputs_class: Type[TO] = ImageColorizationOutputs + inputs_class: Type[TI] = SpeakerDiarizationAudioInputs + outputs_class: Type[TO] = SpeakerDiarizationAudioOutputs -class AudioGenerationMetricInputs(Inputs): - hypotheses: InputParam = None - references: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None +class BenchmarkScoringAsrInputs(Inputs): + input: InputParam = None + text: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) - self.references = self.create_param(code="references", data_type=DataType.AUDIO, is_required=False) - self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + self.input = self.create_param(code="input", data_type=DataType.AUDIO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class AudioGenerationMetricOutputs(Outputs): +class BenchmarkScoringAsrOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioGenerationMetric(BaseMetric[AudioGenerationMetricInputs, AudioGenerationMetricOutputs]): +class BenchmarkScoringAsr(AssetNode[BenchmarkScoringAsrInputs, BenchmarkScoringAsrOutputs]): """ - The Audio Generation Metric is a quantitative measure used to evaluate the - quality, accuracy, and overall performance of audio generated by artificial - intelligence systems, often considering factors such as fidelity, - intelligibility, and similarity to human-produced audio. + Benchmark Scoring ASR is a function that evaluates and compares the performance +of automatic speech recognition systems by analyzing their accuracy, speed, and +other relevant metrics against a standardized set of benchmarks. - InputType: text - OutputType: text + InputType: audio + OutputType: label """ + function: str = "benchmark-scoring-asr" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "audio-generation-metric" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = AudioGenerationMetricInputs - outputs_class: Type[TO] = AudioGenerationMetricOutputs + inputs_class: Type[TI] = BenchmarkScoringAsrInputs + outputs_class: Type[TO] = BenchmarkScoringAsrOutputs -class ImageImpaintingInputs(Inputs): +class VisualQuestionAnsweringInputs(Inputs): + text: InputParam = None + language: InputParam = None image: InputParam = None def __init__(self, node=None): super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class ImageImpaintingOutputs(Outputs): - image: OutputParam = None +class VisualQuestionAnsweringOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ImageImpainting(AssetNode[ImageImpaintingInputs, ImageImpaintingOutputs]): +class VisualQuestionAnswering(AssetNode[VisualQuestionAnsweringInputs, VisualQuestionAnsweringOutputs]): """ - Image inpainting is a process that involves filling in missing or damaged parts - of an image in a way that is visually coherent and seamlessly blends with the - surrounding areas, often using advanced algorithms and techniques to restore - the image to its original or intended appearance. + Visual Question Answering (VQA) is a task in artificial intelligence that +involves analyzing an image and providing accurate, contextually relevant +answers to questions posed about the visual content of that image. - InputType: image - OutputType: image + InputType: image + OutputType: video """ - - function: str = "image-impainting" + function: str = "visual-question-answering" input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE + output_type: str = DataType.VIDEO - inputs_class: Type[TI] = ImageImpaintingInputs - outputs_class: Type[TO] = ImageImpaintingOutputs + inputs_class: Type[TI] = VisualQuestionAnsweringInputs + outputs_class: Type[TO] = VisualQuestionAnsweringOutputs -class StyleTransferInputs(Inputs): +class DocumentImageParsingInputs(Inputs): image: InputParam = None def __init__(self, node=None): @@ -1164,34 +1221,32 @@ def __init__(self, node=None): self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class StyleTransferOutputs(Outputs): - image: OutputParam = None +class DocumentImageParsingOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class StyleTransfer(AssetNode[StyleTransferInputs, StyleTransferOutputs]): +class DocumentImageParsing(AssetNode[DocumentImageParsingInputs, DocumentImageParsingOutputs]): """ - Style Transfer is a technique in artificial intelligence that applies the - visual style of one image (such as the brushstrokes of a famous painting) to - the content of another image, effectively blending the artistic elements of the - first image with the subject matter of the second. + Document Image Parsing is the process of analyzing and converting scanned or +photographed images of documents into structured, machine-readable formats by +identifying and extracting text, layout, and other relevant information. - InputType: image - OutputType: image + InputType: image + OutputType: text """ - - function: str = "style-transfer" + function: str = "document-image-parsing" input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE + output_type: str = DataType.TEXT - inputs_class: Type[TI] = StyleTransferInputs - outputs_class: Type[TO] = StyleTransferOutputs + inputs_class: Type[TI] = DocumentImageParsingInputs + outputs_class: Type[TO] = DocumentImageParsingOutputs -class MultiClassTextClassificationInputs(Inputs): +class MultiLabelTextClassificationInputs(Inputs): language: InputParam = None text: InputParam = None @@ -1201,7 +1256,7 @@ def __init__(self, node=None): self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class MultiClassTextClassificationOutputs(Outputs): +class MultiLabelTextClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1209,76 +1264,67 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class MultiClassTextClassification(AssetNode[MultiClassTextClassificationInputs, MultiClassTextClassificationOutputs]): +class MultiLabelTextClassification(AssetNode[MultiLabelTextClassificationInputs, MultiLabelTextClassificationOutputs]): """ - Multi Class Text Classification is a natural language processing task that - involves categorizing a given text into one of several predefined classes or - categories based on its content. + Multi Label Text Classification is a natural language processing task where a +given text is analyzed and assigned multiple relevant labels or categories from +a predefined set, allowing for the text to belong to more than one category +simultaneously. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "multi-class-text-classification" + function: str = "multi-label-text-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = MultiClassTextClassificationInputs - outputs_class: Type[TO] = MultiClassTextClassificationOutputs + inputs_class: Type[TI] = MultiLabelTextClassificationInputs + outputs_class: Type[TO] = MultiLabelTextClassificationOutputs -class TextEmbeddingInputs(Inputs): +class TextReconstructionInputs(Inputs): text: InputParam = None - language: InputParam = None - dialect: InputParam = None - script: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextEmbeddingOutputs(Outputs): +class TextReconstructionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextEmbedding(AssetNode[TextEmbeddingInputs, TextEmbeddingOutputs]): +class TextReconstruction(BaseReconstructor[TextReconstructionInputs, TextReconstructionOutputs]): """ - Text embedding is a process that converts text into numerical vectors, - capturing the semantic meaning and contextual relationships of words or - phrases, enabling machines to understand and analyze natural language more - effectively. + Text Reconstruction is a process that involves piecing together fragmented or +incomplete text data to restore it to its original, coherent form. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "text-embedding" + function: str = "text-reconstruction" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextEmbeddingInputs - outputs_class: Type[TO] = TextEmbeddingOutputs + inputs_class: Type[TI] = TextReconstructionInputs + outputs_class: Type[TO] = TextReconstructionOutputs -class MultiLabelTextClassificationInputs(Inputs): - language: InputParam = None - text: InputParam = None +class VideoContentModerationInputs(Inputs): + video: InputParam = None + min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) -class MultiLabelTextClassificationOutputs(Outputs): +class VideoContentModerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1286,34 +1332,33 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class MultiLabelTextClassification(AssetNode[MultiLabelTextClassificationInputs, MultiLabelTextClassificationOutputs]): +class VideoContentModeration(AssetNode[VideoContentModerationInputs, VideoContentModerationOutputs]): """ - Multi Label Text Classification is a natural language processing task where a - given text is analyzed and assigned multiple relevant labels or categories from - a predefined set, allowing for the text to belong to more than one category - simultaneously. + Automatically reviews video content to detect and possibly remove inappropriate +or harmful material. Essential for user-generated content platforms. - InputType: text - OutputType: label + InputType: video + OutputType: label """ - - function: str = "multi-label-text-classification" - input_type: str = DataType.TEXT + function: str = "video-content-moderation" + input_type: str = DataType.VIDEO output_type: str = DataType.LABEL - inputs_class: Type[TI] = MultiLabelTextClassificationInputs - outputs_class: Type[TO] = MultiLabelTextClassificationOutputs + inputs_class: Type[TI] = VideoContentModerationInputs + outputs_class: Type[TO] = VideoContentModerationOutputs -class TextReconstructionInputs(Inputs): - text: InputParam = None +class MultilingualSpeechRecognitionInputs(Inputs): + source_audio: InputParam = None + language: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) -class TextReconstructionOutputs(Outputs): +class MultilingualSpeechRecognitionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1321,34 +1366,36 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextReconstruction(BaseReconstructor[TextReconstructionInputs, TextReconstructionOutputs]): +class MultilingualSpeechRecognition(AssetNode[MultilingualSpeechRecognitionInputs, MultilingualSpeechRecognitionOutputs]): """ - Text Reconstruction is a process that involves piecing together fragmented or - incomplete text data to restore it to its original, coherent form. + Multilingual Speech Recognition is a technology that enables the automatic +transcription of spoken language into text across multiple languages, allowing +for seamless communication and understanding in diverse linguistic contexts. - InputType: text - OutputType: text + InputType: audio + OutputType: text """ - - function: str = "text-reconstruction" - input_type: str = DataType.TEXT + function: str = "multilingual-speech-recognition" + input_type: str = DataType.AUDIO output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextReconstructionInputs - outputs_class: Type[TO] = TextReconstructionOutputs + inputs_class: Type[TI] = MultilingualSpeechRecognitionInputs + outputs_class: Type[TO] = MultilingualSpeechRecognitionOutputs -class FactCheckingInputs(Inputs): - language: InputParam = None +class EntityLinkingInputs(Inputs): text: InputParam = None + language: InputParam = None + domain: InputParam = None def __init__(self, node=None): super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) -class FactCheckingOutputs(Outputs): +class EntityLinkingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1356,75 +1403,64 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class FactChecking(AssetNode[FactCheckingInputs, FactCheckingOutputs]): +class EntityLinking(AssetNode[EntityLinkingInputs, EntityLinkingOutputs]): """ - Fact Checking is the process of verifying the accuracy and truthfulness of - information, statements, or claims by cross-referencing with reliable sources - and evidence. + Associates identified entities in the text with specific entries in a knowledge +base or database. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "fact-checking" + function: str = "entity-linking" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = FactCheckingInputs - outputs_class: Type[TO] = FactCheckingOutputs + inputs_class: Type[TI] = EntityLinkingInputs + outputs_class: Type[TO] = EntityLinkingOutputs -class SpeechClassificationInputs(Inputs): - audio: InputParam = None - language: InputParam = None - script: InputParam = None - dialect: InputParam = None +class AudioReconstructionInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class SpeechClassificationOutputs(Outputs): +class AudioReconstructionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class SpeechClassification(AssetNode[SpeechClassificationInputs, SpeechClassificationOutputs]): +class AudioReconstruction(BaseReconstructor[AudioReconstructionInputs, AudioReconstructionOutputs]): """ - Speech Classification is a process that involves analyzing and categorizing - spoken language into predefined categories or classes based on various features - such as tone, pitch, and linguistic content. + Audio Reconstruction is the process of restoring or recreating audio signals +from incomplete, damaged, or degraded recordings to achieve a high-quality, +accurate representation of the original sound. - InputType: audio - OutputType: label + InputType: audio + OutputType: audio """ - - function: str = "speech-classification" + function: str = "audio-reconstruction" input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = SpeechClassificationInputs - outputs_class: Type[TO] = SpeechClassificationOutputs + inputs_class: Type[TI] = AudioReconstructionInputs + outputs_class: Type[TO] = AudioReconstructionOutputs -class IntentClassificationInputs(Inputs): - language: InputParam = None - text: InputParam = None +class AudioEmotionDetectionInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) -class IntentClassificationOutputs(Outputs): +class AudioEmotionDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1432,143 +1468,143 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class IntentClassification(AssetNode[IntentClassificationInputs, IntentClassificationOutputs]): +class AudioEmotionDetection(AssetNode[AudioEmotionDetectionInputs, AudioEmotionDetectionOutputs]): """ - Intent Classification is a natural language processing task that involves - analyzing and categorizing user text input to determine the underlying purpose - or goal behind the communication, such as booking a flight, asking for weather - information, or setting a reminder. + Audio Emotion Detection is a technology that analyzes vocal characteristics and +patterns in audio recordings to identify and classify the emotional state of +the speaker. - InputType: text - OutputType: label + InputType: audio + OutputType: label """ - - function: str = "intent-classification" - input_type: str = DataType.TEXT + function: str = "audio-emotion-detection" + input_type: str = DataType.AUDIO output_type: str = DataType.LABEL - inputs_class: Type[TI] = IntentClassificationInputs - outputs_class: Type[TO] = IntentClassificationOutputs + inputs_class: Type[TI] = AudioEmotionDetectionInputs + outputs_class: Type[TO] = AudioEmotionDetectionOutputs -class PartOfSpeechTaggingInputs(Inputs): - language: InputParam = None +class SplitOnLinebreakInputs(Inputs): text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class PartOfSpeechTaggingOutputs(Outputs): +class SplitOnLinebreakOutputs(Outputs): data: OutputParam = None + audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class PartOfSpeechTagging(AssetNode[PartOfSpeechTaggingInputs, PartOfSpeechTaggingOutputs]): +class SplitOnLinebreak(BaseSegmentor[SplitOnLinebreakInputs, SplitOnLinebreakOutputs]): """ - Part of Speech Tagging is a natural language processing task that involves - assigning each word in a sentence its corresponding part of speech, such as - noun, verb, adjective, or adverb, based on its role and context within the - sentence. + The "Split On Linebreak" function divides a given string into a list of +substrings, using linebreaks (newline characters) as the points of separation. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "part-of-speech-tagging" + function: str = "split-on-linebreak" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = PartOfSpeechTaggingInputs - outputs_class: Type[TO] = PartOfSpeechTaggingOutputs + inputs_class: Type[TI] = SplitOnLinebreakInputs + outputs_class: Type[TO] = SplitOnLinebreakOutputs -class MetricAggregationInputs(Inputs): - text: InputParam = None +class KeywordSpottingInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) -class MetricAggregationOutputs(Outputs): +class KeywordSpottingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class MetricAggregation(BaseMetric[MetricAggregationInputs, MetricAggregationOutputs]): +class KeywordSpotting(AssetNode[KeywordSpottingInputs, KeywordSpottingOutputs]): """ - Metric Aggregation is a function that computes and summarizes numerical data by - applying statistical operations, such as averaging, summing, or finding the - minimum and maximum values, to provide insights and facilitate analysis of - large datasets. + Keyword Spotting is a function that enables the detection and identification of +specific words or phrases within a stream of audio, often used in voice- +activated systems to trigger actions or commands based on recognized keywords. - InputType: text - OutputType: text + InputType: audio + OutputType: label """ + function: str = "keyword-spotting" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "metric-aggregation" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = MetricAggregationInputs - outputs_class: Type[TO] = MetricAggregationOutputs + inputs_class: Type[TI] = KeywordSpottingInputs + outputs_class: Type[TO] = KeywordSpottingOutputs -class DialectDetectionInputs(Inputs): - audio: InputParam = None +class TextClassificationInputs(Inputs): + text: InputParam = None language: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class DialectDetectionOutputs(Outputs): +class TextClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class DialectDetection(AssetNode[DialectDetectionInputs, DialectDetectionOutputs]): +class TextClassification(AssetNode[TextClassificationInputs, TextClassificationOutputs]): """ - Dialect Detection is a function that identifies and classifies the specific - regional or social variations of a language spoken or written by an individual, - enabling the recognition of distinct linguistic patterns and nuances associated - with different dialects. + Categorizes text into predefined groups or topics, facilitating content +organization and targeted actions. - InputType: audio - OutputType: text + InputType: text + OutputType: label """ + function: str = "text-classification" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "dialect-detection" - input_type: str = DataType.AUDIO - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = DialectDetectionInputs - outputs_class: Type[TO] = DialectDetectionOutputs + inputs_class: Type[TI] = TextClassificationInputs + outputs_class: Type[TO] = TextClassificationOutputs -class InverseTextNormalizationInputs(Inputs): +class OffensiveLanguageIdentificationInputs(Inputs): text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class InverseTextNormalizationOutputs(Outputs): +class OffensiveLanguageIdentificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1576,64 +1612,66 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class InverseTextNormalization(AssetNode[InverseTextNormalizationInputs, InverseTextNormalizationOutputs]): +class OffensiveLanguageIdentification(AssetNode[OffensiveLanguageIdentificationInputs, OffensiveLanguageIdentificationOutputs]): """ - Inverse Text Normalization is the process of converting spoken or written - language in its normalized form, such as numbers, dates, and abbreviations, - back into their original, more complex or detailed textual representations. + Detects language or phrases that might be considered offensive, aiding in +content moderation and creating respectful user interactions. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "inverse-text-normalization" + function: str = "offensive-language-identification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = InverseTextNormalizationInputs - outputs_class: Type[TO] = InverseTextNormalizationOutputs + inputs_class: Type[TI] = OffensiveLanguageIdentificationInputs + outputs_class: Type[TO] = OffensiveLanguageIdentificationOutputs -class TextToAudioInputs(Inputs): - text: InputParam = None +class SpeechNonSpeechClassificationInputs(Inputs): + audio: InputParam = None language: InputParam = None + script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class TextToAudioOutputs(Outputs): +class SpeechNonSpeechClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextToAudio(AssetNode[TextToAudioInputs, TextToAudioOutputs]): +class SpeechNonSpeechClassification(AssetNode[SpeechNonSpeechClassificationInputs, SpeechNonSpeechClassificationOutputs]): """ - The Text to Audio function converts written text into spoken words, allowing - users to listen to the content instead of reading it. + Differentiates between speech and non-speech audio segments. Great for editing +software and transcription services to exclude irrelevant audio. - InputType: text - OutputType: audio + InputType: audio + OutputType: label """ + function: str = "speech-non-speech-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "text-to-audio" - input_type: str = DataType.TEXT - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = TextToAudioInputs - outputs_class: Type[TO] = TextToAudioOutputs + inputs_class: Type[TI] = SpeechNonSpeechClassificationInputs + outputs_class: Type[TO] = SpeechNonSpeechClassificationOutputs -class FillTextMaskInputs(Inputs): +class NamedEntityRecognitionInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None script: InputParam = None + domain: InputParam = None def __init__(self, node=None): super().__init__(node=node) @@ -1641,80 +1679,78 @@ def __init__(self, node=None): self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) -class FillTextMaskOutputs(Outputs): +class NamedEntityRecognitionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class FillTextMask(AssetNode[FillTextMaskInputs, FillTextMaskOutputs]): +class NamedEntityRecognition(AssetNode[NamedEntityRecognitionInputs, NamedEntityRecognitionOutputs]): """ - The "Fill Text Mask" function takes a text input with masked or placeholder - characters and replaces those placeholders with specified or contextually - appropriate characters to generate a complete and coherent text output. + Identifies and classifies named entities (e.g., persons, organizations, +locations) within text. Useful for information extraction, content tagging, and +search enhancements. - InputType: text - OutputType: text + InputType: text + OutputType: label """ - - function: str = "fill-text-mask" + function: str = "named-entity-recognition" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.LABEL - inputs_class: Type[TI] = FillTextMaskInputs - outputs_class: Type[TO] = FillTextMaskOutputs + inputs_class: Type[TI] = NamedEntityRecognitionInputs + outputs_class: Type[TO] = NamedEntityRecognitionOutputs -class VideoContentModerationInputs(Inputs): - video: InputParam = None - min_confidence: InputParam = None +class ImageManipulationInputs(Inputs): + image: InputParam = None + targetimage: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) - self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.targetimage = self.create_param(code="targetimage", data_type=DataType.IMAGE, is_required=True) -class VideoContentModerationOutputs(Outputs): - data: OutputParam = None +class ImageManipulationOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class VideoContentModeration(AssetNode[VideoContentModerationInputs, VideoContentModerationOutputs]): +class ImageManipulation(AssetNode[ImageManipulationInputs, ImageManipulationOutputs]): """ - Video Content Moderation is the process of reviewing, analyzing, and filtering - video content to ensure it adheres to community guidelines, legal standards, - and platform policies, thereby preventing the dissemination of inappropriate, - harmful, or illegal material. + Image Manipulation refers to the process of altering or enhancing digital +images using various techniques and tools to achieve desired visual effects, +correct imperfections, or transform the image's appearance. - InputType: video - OutputType: label + InputType: image + OutputType: image """ + function: str = "image-manipulation" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE - function: str = "video-content-moderation" - input_type: str = DataType.VIDEO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = VideoContentModerationInputs - outputs_class: Type[TO] = VideoContentModerationOutputs + inputs_class: Type[TI] = ImageManipulationInputs + outputs_class: Type[TO] = ImageManipulationOutputs -class ExtractAudioFromVideoInputs(Inputs): - video: InputParam = None +class SplitOnSilenceInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) -class ExtractAudioFromVideoOutputs(Outputs): +class SplitOnSilenceOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1722,152 +1758,169 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class ExtractAudioFromVideo(AssetNode[ExtractAudioFromVideoInputs, ExtractAudioFromVideoOutputs]): +class SplitOnSilence(AssetNode[SplitOnSilenceInputs, SplitOnSilenceOutputs]): + """ + The "Split On Silence" function divides an audio recording into separate +segments based on periods of silence, allowing for easier editing and analysis +of individual sections. + + InputType: audio + OutputType: audio """ - The "Extract Audio From Video" function allows users to separate and save the - audio track from a video file, enabling them to obtain just the sound without - the accompanying visual content. + function: str = "split-on-silence" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = SplitOnSilenceInputs + outputs_class: Type[TO] = SplitOnSilenceOutputs + + +class TextToVideoGenerationInputs(Inputs): + text: InputParam = None + language: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + + +class TextToVideoGenerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) + - InputType: video - OutputType: audio +class TextToVideoGeneration(AssetNode[TextToVideoGenerationInputs, TextToVideoGenerationOutputs]): """ + Text To Video Generation is a process that converts written descriptions or +scripts into dynamic, visual video content using advanced algorithms and +artificial intelligence. - function: str = "extract-audio-from-video" - input_type: str = DataType.VIDEO - output_type: str = DataType.AUDIO + InputType: text + OutputType: video + """ + function: str = "text-to-video-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.VIDEO - inputs_class: Type[TI] = ExtractAudioFromVideoInputs - outputs_class: Type[TO] = ExtractAudioFromVideoOutputs + inputs_class: Type[TI] = TextToVideoGenerationInputs + outputs_class: Type[TO] = TextToVideoGenerationOutputs -class ImageCompressionInputs(Inputs): +class DocumentInformationExtractionInputs(Inputs): image: InputParam = None - apl_qfactor: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.apl_qfactor = self.create_param(code="apl_qfactor", data_type=DataType.TEXT, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class ImageCompressionOutputs(Outputs): - image: OutputParam = None +class DocumentInformationExtractionOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ImageCompression(AssetNode[ImageCompressionInputs, ImageCompressionOutputs]): +class DocumentInformationExtraction(AssetNode[DocumentInformationExtractionInputs, DocumentInformationExtractionOutputs]): """ - Image compression is a process that reduces the file size of an image by - removing redundant or non-essential data, while maintaining an acceptable level - of visual quality. + Document Information Extraction is the process of automatically identifying, +extracting, and structuring relevant data from unstructured or semi-structured +documents, such as invoices, receipts, contracts, and forms, to facilitate +easier data management and analysis. - InputType: image - OutputType: image + InputType: image + OutputType: text """ - - function: str = "image-compression" + function: str = "document-information-extraction" input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE + output_type: str = DataType.TEXT - inputs_class: Type[TI] = ImageCompressionInputs - outputs_class: Type[TO] = ImageCompressionOutputs + inputs_class: Type[TI] = DocumentInformationExtractionInputs + outputs_class: Type[TO] = DocumentInformationExtractionOutputs -class MultilingualSpeechRecognitionInputs(Inputs): - source_audio: InputParam = None - language: InputParam = None +class VideoGenerationInputs(Inputs): + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class MultilingualSpeechRecognitionOutputs(Outputs): +class VideoGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) -class MultilingualSpeechRecognition(AssetNode[MultilingualSpeechRecognitionInputs, MultilingualSpeechRecognitionOutputs]): +class VideoGeneration(AssetNode[VideoGenerationInputs, VideoGenerationOutputs]): """ - Multilingual Speech Recognition is a technology that enables the automatic - transcription of spoken language into text across multiple languages, allowing - for seamless communication and understanding in diverse linguistic contexts. + Produces video content based on specific inputs or datasets. Can be used for +simulations, animations, or even deepfake detection. - InputType: audio - OutputType: text + InputType: text + OutputType: video """ + function: str = "video-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.VIDEO - function: str = "multilingual-speech-recognition" - input_type: str = DataType.AUDIO - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = MultilingualSpeechRecognitionInputs - outputs_class: Type[TO] = MultilingualSpeechRecognitionOutputs + inputs_class: Type[TI] = VideoGenerationInputs + outputs_class: Type[TO] = VideoGenerationOutputs -class ReferencelessTextGenerationMetricInputs(Inputs): - hypotheses: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None +class TextToImageGenerationInputs(Inputs): + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) - self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class ReferencelessTextGenerationMetricOutputs(Outputs): +class TextToImageGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.IMAGE) -class ReferencelessTextGenerationMetric( - BaseMetric[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs] -): +class TextToImageGeneration(AssetNode[TextToImageGenerationInputs, TextToImageGenerationOutputs]): """ - The Referenceless Text Generation Metric is a method for evaluating the quality - of generated text without requiring a reference text for comparison, often - leveraging models or algorithms to assess coherence, relevance, and fluency - based on intrinsic properties of the text itself. + Creates a visual representation based on textual input, turning descriptions +into pictorial forms. Used in creative processes and content generation. - InputType: text - OutputType: text + InputType: text + OutputType: image """ - - function: str = "referenceless-text-generation-metric" + function: str = "text-to-image-generation" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.IMAGE - inputs_class: Type[TI] = ReferencelessTextGenerationMetricInputs - outputs_class: Type[TO] = ReferencelessTextGenerationMetricOutputs + inputs_class: Type[TI] = TextToImageGenerationInputs + outputs_class: Type[TO] = TextToImageGenerationOutputs -class TextGenerationMetricDefaultInputs(Inputs): +class ReferencelessTextGenerationMetricInputs(Inputs): hypotheses: InputParam = None - references: InputParam = None sources: InputParam = None score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) - self.references = self.create_param(code="references", data_type=DataType.TEXT, is_required=False) self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class TextGenerationMetricDefaultOutputs(Outputs): +class ReferencelessTextGenerationMetricOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1875,153 +1928,151 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextGenerationMetricDefault(BaseMetric[TextGenerationMetricDefaultInputs, TextGenerationMetricDefaultOutputs]): +class ReferencelessTextGenerationMetric(BaseMetric[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs]): """ - The "Text Generation Metric Default" function provides a standard set of - evaluation metrics for assessing the quality and performance of text generation - models. + The Referenceless Text Generation Metric is a method for evaluating the quality +of generated text without requiring a reference text for comparison, often +leveraging models or algorithms to assess coherence, relevance, and fluency +based on intrinsic properties of the text itself. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "text-generation-metric-default" + function: str = "referenceless-text-generation-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextGenerationMetricDefaultInputs - outputs_class: Type[TO] = TextGenerationMetricDefaultOutputs + inputs_class: Type[TI] = ReferencelessTextGenerationMetricInputs + outputs_class: Type[TO] = ReferencelessTextGenerationMetricOutputs -class NoiseRemovalInputs(Inputs): - audio: InputParam = None +class OtherMultipurposeInputs(Inputs): + text: InputParam = None + language: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) -class NoiseRemovalOutputs(Outputs): +class OtherMultipurposeOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class NoiseRemoval(AssetNode[NoiseRemovalInputs, NoiseRemovalOutputs]): +class OtherMultipurpose(AssetNode[OtherMultipurposeInputs, OtherMultipurposeOutputs]): """ - Noise Removal is a process that involves identifying and eliminating unwanted - random variations or disturbances from an audio signal to enhance the clarity - and quality of the underlying information. + The "Other (Multipurpose)" function serves as a versatile category designed to +accommodate a wide range of tasks and activities that do not fit neatly into +predefined classifications, offering flexibility and adaptability for various +needs. - InputType: audio - OutputType: audio + InputType: text + OutputType: text """ + function: str = "other-(multipurpose)" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - function: str = "noise-removal" - input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = NoiseRemovalInputs - outputs_class: Type[TO] = NoiseRemovalOutputs + inputs_class: Type[TI] = OtherMultipurposeInputs + outputs_class: Type[TO] = OtherMultipurposeOutputs -class AudioReconstructionInputs(Inputs): - audio: InputParam = None +class ImageLabelDetectionInputs(Inputs): + image: InputParam = None + min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) -class AudioReconstructionOutputs(Outputs): +class ImageLabelDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioReconstruction(BaseReconstructor[AudioReconstructionInputs, AudioReconstructionOutputs]): +class ImageLabelDetection(AssetNode[ImageLabelDetectionInputs, ImageLabelDetectionOutputs]): """ - Audio Reconstruction is the process of restoring or recreating audio signals - from incomplete, damaged, or degraded recordings to achieve a high-quality, - accurate representation of the original sound. + Identifies objects, themes, or topics within images, useful for image +categorization, search, and recommendation systems. - InputType: audio - OutputType: audio + InputType: image + OutputType: label """ + function: str = "image-label-detection" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL - function: str = "audio-reconstruction" - input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = AudioReconstructionInputs - outputs_class: Type[TO] = AudioReconstructionOutputs + inputs_class: Type[TI] = ImageLabelDetectionInputs + outputs_class: Type[TO] = ImageLabelDetectionOutputs -class VoiceCloningInputs(Inputs): - text: InputParam = None - audio: InputParam = None +class SpeakerDiarizationVideoInputs(Inputs): + video: InputParam = None language: InputParam = None - dialect: InputParam = None - voice: InputParam = None script: InputParam = None - type: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class VoiceCloningOutputs(Outputs): +class SpeakerDiarizationVideoOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) -class VoiceCloning(AssetNode[VoiceCloningInputs, VoiceCloningOutputs]): +class SpeakerDiarizationVideo(AssetNode[SpeakerDiarizationVideoInputs, SpeakerDiarizationVideoOutputs]): """ - Voice cloning is a technology that uses artificial intelligence to create a - digital replica of a person's voice, allowing for the generation of speech that - mimics the tone, pitch, and speaking style of the original speaker. + Segments a video based on different speakers, identifying when each individual +speaks. Useful for transcriptions and understanding multi-person conversations. - InputType: text - OutputType: audio + InputType: video + OutputType: label """ + function: str = "speaker-diarization-video" + input_type: str = DataType.VIDEO + output_type: str = DataType.LABEL - function: str = "voice-cloning" - input_type: str = DataType.TEXT - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = VoiceCloningInputs - outputs_class: Type[TO] = VoiceCloningOutputs + inputs_class: Type[TI] = SpeakerDiarizationVideoInputs + outputs_class: Type[TO] = SpeakerDiarizationVideoOutputs -class DiacritizationInputs(Inputs): +class AudioTranscriptImprovementInputs(Inputs): language: InputParam = None dialect: InputParam = None + source_supplier: InputParam = None + is_medical: InputParam = None + source_audio: InputParam = None script: InputParam = None - text: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) + self.is_medical = self.create_param(code="is_medical", data_type=DataType.TEXT, is_required=True) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class DiacritizationOutputs(Outputs): +class AudioTranscriptImprovementOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2029,139 +2080,134 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class Diacritization(AssetNode[DiacritizationInputs, DiacritizationOutputs]): +class AudioTranscriptImprovement(AssetNode[AudioTranscriptImprovementInputs, AudioTranscriptImprovementOutputs]): """ - Diacritization is the process of adding diacritical marks to letters in a text - to indicate pronunciation, stress, tone, or meaning, often used in languages - such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in - written communication. + Refines and corrects transcriptions generated from audio data, improving +readability and accuracy. - InputType: text - OutputType: text + InputType: audio + OutputType: text """ - - function: str = "diacritization" - input_type: str = DataType.TEXT + function: str = "audio-transcript-improvement" + input_type: str = DataType.AUDIO output_type: str = DataType.TEXT - inputs_class: Type[TI] = DiacritizationInputs - outputs_class: Type[TO] = DiacritizationOutputs + inputs_class: Type[TI] = AudioTranscriptImprovementInputs + outputs_class: Type[TO] = AudioTranscriptImprovementOutputs -class AudioEmotionDetectionInputs(Inputs): +class DialectDetectionInputs(Inputs): audio: InputParam = None + language: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) -class AudioEmotionDetectionOutputs(Outputs): +class DialectDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class AudioEmotionDetection(AssetNode[AudioEmotionDetectionInputs, AudioEmotionDetectionOutputs]): +class DialectDetection(AssetNode[DialectDetectionInputs, DialectDetectionOutputs]): """ - Audio Emotion Detection is a technology that analyzes vocal characteristics and - patterns in audio recordings to identify and classify the emotional state of - the speaker. + Identifies specific dialects within a language, aiding in localized content +creation or user experience personalization. - InputType: audio - OutputType: label + InputType: audio + OutputType: text """ - - function: str = "audio-emotion-detection" + function: str = "dialect-detection" input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = AudioEmotionDetectionInputs - outputs_class: Type[TO] = AudioEmotionDetectionOutputs + inputs_class: Type[TI] = DialectDetectionInputs + outputs_class: Type[TO] = DialectDetectionOutputs -class TextSummarizationInputs(Inputs): +class SentimentAnalysisInputs(Inputs): text: InputParam = None language: InputParam = None - script: InputParam = None dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextSummarizationOutputs(Outputs): +class SentimentAnalysisOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextSummarization(AssetNode[TextSummarizationInputs, TextSummarizationOutputs]): +class SentimentAnalysis(AssetNode[SentimentAnalysisInputs, SentimentAnalysisOutputs]): """ - Text summarization is the process of condensing a large body of text into a - shorter version, capturing the main points and essential information while - maintaining coherence and meaning. + Determines the sentiment or emotion (e.g., positive, negative, neutral) of a +piece of text, aiding in understanding user feedback or market sentiment. - InputType: text - OutputType: text + InputType: text + OutputType: label """ - - function: str = "text-summarization" + function: str = "sentiment-analysis" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.LABEL - inputs_class: Type[TI] = TextSummarizationInputs - outputs_class: Type[TO] = TextSummarizationOutputs + inputs_class: Type[TI] = SentimentAnalysisInputs + outputs_class: Type[TO] = SentimentAnalysisOutputs -class EntityLinkingInputs(Inputs): - text: InputParam = None +class SpeechEmbeddingInputs(Inputs): + audio: InputParam = None language: InputParam = None - domain: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class EntityLinkingOutputs(Outputs): +class SpeechEmbeddingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class EntityLinking(AssetNode[EntityLinkingInputs, EntityLinkingOutputs]): +class SpeechEmbedding(AssetNode[SpeechEmbeddingInputs, SpeechEmbeddingOutputs]): """ - Entity Linking is the process of identifying and connecting mentions of - entities within a text to their corresponding entries in a structured knowledge - base, thereby enabling the disambiguation of terms and enhancing the - understanding of the text's context. + Transforms spoken content into a fixed-size vector in a high-dimensional space +that captures the content's essence. Facilitates tasks like speech recognition +and speaker verification. - InputType: text - OutputType: label + InputType: audio + OutputType: text """ + function: str = "speech-embedding" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT - function: str = "entity-linking" - input_type: str = DataType.TEXT - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = EntityLinkingInputs - outputs_class: Type[TO] = EntityLinkingOutputs + inputs_class: Type[TI] = SpeechEmbeddingInputs + outputs_class: Type[TO] = SpeechEmbeddingOutputs -class TextGenerationMetricInputs(Inputs): +class TextGenerationMetricDefaultInputs(Inputs): hypotheses: InputParam = None references: InputParam = None sources: InputParam = None @@ -2175,7 +2221,7 @@ def __init__(self, node=None): self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class TextGenerationMetricOutputs(Outputs): +class TextGenerationMetricDefaultOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2183,75 +2229,72 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextGenerationMetric(BaseMetric[TextGenerationMetricInputs, TextGenerationMetricOutputs]): +class TextGenerationMetricDefault(BaseMetric[TextGenerationMetricDefaultInputs, TextGenerationMetricDefaultOutputs]): """ - A Text Generation Metric is a quantitative measure used to evaluate the quality - and effectiveness of text produced by natural language processing models, often - assessing aspects such as coherence, relevance, fluency, and adherence to given - prompts or instructions. + The "Text Generation Metric Default" function provides a standard set of +evaluation metrics for assessing the quality and performance of text generation +models. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "text-generation-metric" + function: str = "text-generation-metric-default" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextGenerationMetricInputs - outputs_class: Type[TO] = TextGenerationMetricOutputs + inputs_class: Type[TI] = TextGenerationMetricDefaultInputs + outputs_class: Type[TO] = TextGenerationMetricDefaultOutputs -class SplitOnLinebreakInputs(Inputs): - text: InputParam = None +class AudioGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.AUDIO, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class SplitOnLinebreakOutputs(Outputs): +class AudioGenerationMetricOutputs(Outputs): data: OutputParam = None - audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) self.data = self.create_param(code="data", data_type=DataType.TEXT) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class SplitOnLinebreak(BaseSegmentor[SplitOnLinebreakInputs, SplitOnLinebreakOutputs]): +class AudioGenerationMetric(BaseMetric[AudioGenerationMetricInputs, AudioGenerationMetricOutputs]): """ - The "Split On Linebreak" function divides a given string into a list of - substrings, using linebreaks (newline characters) as the points of separation. + The Audio Generation Metric is a quantitative measure used to evaluate the +quality, accuracy, and overall performance of audio generated by artificial +intelligence systems, often considering factors such as fidelity, +intelligibility, and similarity to human-produced audio. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "split-on-linebreak" + function: str = "audio-generation-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = SplitOnLinebreakInputs - outputs_class: Type[TO] = SplitOnLinebreakOutputs + inputs_class: Type[TI] = AudioGenerationMetricInputs + outputs_class: Type[TO] = AudioGenerationMetricOutputs -class SentimentAnalysisInputs(Inputs): - text: InputParam = None - language: InputParam = None - dialect: InputParam = None - script: InputParam = None +class AudioLanguageIdentificationInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) -class SentimentAnalysisOutputs(Outputs): +class AudioLanguageIdentificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2259,34 +2302,33 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class SentimentAnalysis(AssetNode[SentimentAnalysisInputs, SentimentAnalysisOutputs]): +class AudioLanguageIdentification(AssetNode[AudioLanguageIdentificationInputs, AudioLanguageIdentificationOutputs]): """ - Sentiment Analysis is a natural language processing technique used to determine - and classify the emotional tone or subjective information expressed in a piece - of text, such as identifying whether the sentiment is positive, negative, or - neutral. + Audio Language Identification is a process that involves analyzing an audio +recording to determine the language being spoken. - InputType: text - OutputType: label + InputType: audio + OutputType: label """ - - function: str = "sentiment-analysis" - input_type: str = DataType.TEXT + function: str = "audio-language-identification" + input_type: str = DataType.AUDIO output_type: str = DataType.LABEL - inputs_class: Type[TI] = SentimentAnalysisInputs - outputs_class: Type[TO] = SentimentAnalysisOutputs + inputs_class: Type[TI] = AudioLanguageIdentificationInputs + outputs_class: Type[TO] = AudioLanguageIdentificationOutputs -class KeywordSpottingInputs(Inputs): - audio: InputParam = None +class VideoLabelDetectionInputs(Inputs): + video: InputParam = None + min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) -class KeywordSpottingOutputs(Outputs): +class VideoLabelDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2294,39 +2336,37 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class KeywordSpotting(AssetNode[KeywordSpottingInputs, KeywordSpottingOutputs]): +class VideoLabelDetection(AssetNode[VideoLabelDetectionInputs, VideoLabelDetectionOutputs]): """ - Keyword Spotting is a function that enables the detection and identification of - specific words or phrases within a stream of audio, often used in voice- - activated systems to trigger actions or commands based on recognized keywords. + Identifies and tags objects, scenes, or activities within a video. Useful for +content indexing and recommendation systems. - InputType: audio - OutputType: label + InputType: video + OutputType: label """ - - function: str = "keyword-spotting" - input_type: str = DataType.AUDIO + function: str = "video-label-detection" + input_type: str = DataType.VIDEO output_type: str = DataType.LABEL - inputs_class: Type[TI] = KeywordSpottingInputs - outputs_class: Type[TO] = KeywordSpottingOutputs + inputs_class: Type[TI] = VideoLabelDetectionInputs + outputs_class: Type[TO] = VideoLabelDetectionOutputs -class TextClassificationInputs(Inputs): +class TopicClassificationInputs(Inputs): text: InputParam = None language: InputParam = None - dialect: InputParam = None script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class TextClassificationOutputs(Outputs): +class TopicClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2334,36 +2374,35 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextClassification(AssetNode[TextClassificationInputs, TextClassificationOutputs]): +class TopicClassification(AssetNode[TopicClassificationInputs, TopicClassificationOutputs]): """ - Text Classification is a natural language processing task that involves - categorizing text into predefined labels or classes based on its content, - enabling automated organization, filtering, and analysis of large volumes of - textual data. + Assigns categories or topics to a piece of text based on its content, +facilitating content organization and retrieval. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "text-classification" + function: str = "topic-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = TextClassificationInputs - outputs_class: Type[TO] = TextClassificationOutputs + inputs_class: Type[TI] = TopicClassificationInputs + outputs_class: Type[TO] = TopicClassificationOutputs -class OtherMultipurposeInputs(Inputs): - text: InputParam = None - language: InputParam = None +class ReferencelessTextGenerationMetricDefaultInputs(Inputs): + hypotheses: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class OtherMultipurposeOutputs(Outputs): +class ReferencelessTextGenerationMetricDefaultOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2371,80 +2410,99 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class OtherMultipurpose(AssetNode[OtherMultipurposeInputs, OtherMultipurposeOutputs]): +class ReferencelessTextGenerationMetricDefault(BaseMetric[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs]): """ - The "Other (Multipurpose)" function serves as a versatile category designed to - accommodate a wide range of tasks and activities that do not fit neatly into - predefined classifications, offering flexibility and adaptability for various - needs. + The Referenceless Text Generation Metric Default is a function designed to +evaluate the quality of generated text without relying on reference texts for +comparison. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "other-(multipurpose)" + function: str = "referenceless-text-generation-metric-default" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = OtherMultipurposeInputs - outputs_class: Type[TO] = OtherMultipurposeOutputs + inputs_class: Type[TI] = ReferencelessTextGenerationMetricDefaultInputs + outputs_class: Type[TO] = ReferencelessTextGenerationMetricDefaultOutputs -class SpeechSynthesisInputs(Inputs): - audio: InputParam = None - language: InputParam = None - dialect: InputParam = None - voice: InputParam = None - script: InputParam = None - text: InputParam = None - type: InputParam = None +class ImageContentModerationInputs(Inputs): + image: InputParam = None + min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) -class SpeechSynthesisOutputs(Outputs): +class ImageContentModerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class SpeechSynthesis(AssetNode[SpeechSynthesisInputs, SpeechSynthesisOutputs]): +class ImageContentModeration(AssetNode[ImageContentModerationInputs, ImageContentModerationOutputs]): """ - Speech synthesis is the artificial production of human speech, typically - achieved through software or hardware systems that convert text into spoken - words, enabling machines to communicate verbally with users. + Detects and filters out inappropriate or harmful images, essential for +platforms with user-generated visual content. - InputType: text - OutputType: audio + InputType: image + OutputType: label """ + function: str = "image-content-moderation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL - function: str = "speech-synthesis" - input_type: str = DataType.TEXT - output_type: str = DataType.AUDIO + inputs_class: Type[TI] = ImageContentModerationInputs + outputs_class: Type[TO] = ImageContentModerationOutputs - inputs_class: Type[TI] = SpeechSynthesisInputs - outputs_class: Type[TO] = SpeechSynthesisOutputs +class AsrAgeClassificationInputs(Inputs): + source_audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) -class AudioIntentDetectionInputs(Inputs): - audio: InputParam = None + +class AsrAgeClassificationOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioIntentDetectionOutputs(Outputs): +class AsrAgeClassification(AssetNode[AsrAgeClassificationInputs, AsrAgeClassificationOutputs]): + """ + The ASR Age Classification function is designed to analyze audio recordings of +speech to determine the speaker's age group by leveraging automatic speech +recognition (ASR) technology and machine learning algorithms. + + InputType: audio + OutputType: label + """ + function: str = "asr-age-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AsrAgeClassificationInputs + outputs_class: Type[TO] = AsrAgeClassificationOutputs + + +class AsrGenderClassificationInputs(Inputs): + source_audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + + +class AsrGenderClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2452,36 +2510,66 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioIntentDetection(AssetNode[AudioIntentDetectionInputs, AudioIntentDetectionOutputs]): +class AsrGenderClassification(AssetNode[AsrGenderClassificationInputs, AsrGenderClassificationOutputs]): """ - Audio Intent Detection is a process that involves analyzing audio signals to - identify and interpret the underlying intentions or purposes behind spoken - words, enabling systems to understand and respond appropriately to human - speech. + The ASR Gender Classification function analyzes audio recordings to determine +and classify the speaker's gender based on their voice characteristics. + + InputType: audio + OutputType: label + """ + function: str = "asr-gender-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AsrGenderClassificationInputs + outputs_class: Type[TO] = AsrGenderClassificationOutputs + + +class BaseModelInputs(Inputs): + language: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class BaseModelOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) - InputType: audio - OutputType: label + +class BaseModel(AssetNode[BaseModelInputs, BaseModelOutputs]): """ + The Base-Model function serves as a foundational framework designed to provide +essential features and capabilities upon which more specialized or advanced +models can be built and customized. - function: str = "audio-intent-detection" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL + InputType: text + OutputType: text + """ + function: str = "base-model" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - inputs_class: Type[TI] = AudioIntentDetectionInputs - outputs_class: Type[TO] = AudioIntentDetectionOutputs + inputs_class: Type[TI] = BaseModelInputs + outputs_class: Type[TO] = BaseModelOutputs -class VideoLabelDetectionInputs(Inputs): - video: InputParam = None - min_confidence: InputParam = None +class LanguageIdentificationAudioInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) - self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) -class VideoLabelDetectionOutputs(Outputs): +class LanguageIdentificationAudioOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2489,36 +2577,31 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VideoLabelDetection(AssetNode[VideoLabelDetectionInputs, VideoLabelDetectionOutputs]): +class LanguageIdentificationAudio(AssetNode[LanguageIdentificationAudioInputs, LanguageIdentificationAudioOutputs]): """ - Video Label Detection is a function that automatically identifies and tags - various objects, scenes, activities, and other relevant elements within a - video, providing descriptive labels that enhance searchability and content - organization. + The Language Identification Audio function analyzes audio input to determine +and identify the language being spoken. - InputType: video - OutputType: label + InputType: audio + OutputType: label """ - - function: str = "video-label-detection" - input_type: str = DataType.VIDEO + function: str = "language-identification-audio" + input_type: str = DataType.AUDIO output_type: str = DataType.LABEL - inputs_class: Type[TI] = VideoLabelDetectionInputs - outputs_class: Type[TO] = VideoLabelDetectionOutputs + inputs_class: Type[TI] = LanguageIdentificationAudioInputs + outputs_class: Type[TO] = LanguageIdentificationAudioOutputs -class AsrQualityEstimationInputs(Inputs): - text: InputParam = None - script: InputParam = None +class MultiClassImageClassificationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class AsrQualityEstimationOutputs(Outputs): +class MultiClassImageClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2526,103 +2609,91 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AsrQualityEstimation(AssetNode[AsrQualityEstimationInputs, AsrQualityEstimationOutputs]): +class MultiClassImageClassification(AssetNode[MultiClassImageClassificationInputs, MultiClassImageClassificationOutputs]): """ - ASR Quality Estimation is a process that evaluates the accuracy and reliability - of automatic speech recognition systems by analyzing their performance in - transcribing spoken language into text. + Multi Class Image Classification is a machine learning task where an algorithm +is trained to categorize images into one of several predefined classes or +categories based on their visual content. - InputType: text - OutputType: label + InputType: image + OutputType: label """ - - function: str = "asr-quality-estimation" - input_type: str = DataType.TEXT + function: str = "multi-class-image-classification" + input_type: str = DataType.IMAGE output_type: str = DataType.LABEL - inputs_class: Type[TI] = AsrQualityEstimationInputs - outputs_class: Type[TO] = AsrQualityEstimationOutputs + inputs_class: Type[TI] = MultiClassImageClassificationInputs + outputs_class: Type[TO] = MultiClassImageClassificationOutputs -class AudioTranscriptAnalysisInputs(Inputs): - language: InputParam = None - dialect: InputParam = None - source_supplier: InputParam = None - source_audio: InputParam = None - script: InputParam = None +class SemanticSegmentationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class AudioTranscriptAnalysisOutputs(Outputs): +class SemanticSegmentationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioTranscriptAnalysis(AssetNode[AudioTranscriptAnalysisInputs, AudioTranscriptAnalysisOutputs]): +class SemanticSegmentation(AssetNode[SemanticSegmentationInputs, SemanticSegmentationOutputs]): """ - Audio Transcript Analysis is a process that involves converting spoken language - from audio recordings into written text, followed by examining and interpreting - the transcribed content to extract meaningful insights, identify patterns, and - derive actionable information. + Semantic segmentation is a computer vision process that involves classifying +each pixel in an image into a predefined category, effectively partitioning the +image into meaningful segments based on the objects or regions they represent. - InputType: audio - OutputType: text + InputType: image + OutputType: label """ + function: str = "semantic-segmentation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL - function: str = "audio-transcript-analysis" - input_type: str = DataType.AUDIO - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = AudioTranscriptAnalysisInputs - outputs_class: Type[TO] = AudioTranscriptAnalysisOutputs + inputs_class: Type[TI] = SemanticSegmentationInputs + outputs_class: Type[TO] = SemanticSegmentationOutputs -class SearchInputs(Inputs): - text: InputParam = None +class InstanceSegmentationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class SearchOutputs(Outputs): +class InstanceSegmentationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class Search(AssetNode[SearchInputs, SearchOutputs]): +class InstanceSegmentation(AssetNode[InstanceSegmentationInputs, InstanceSegmentationOutputs]): """ - The "Search" function allows users to input keywords or phrases to quickly - locate specific information, files, or content within a database, website, or - application. + Instance segmentation is a computer vision task that involves detecting and +delineating each distinct object within an image, assigning a unique label and +precise boundary to every individual instance of objects, even if they belong +to the same category. - InputType: text - OutputType: text + InputType: image + OutputType: label """ + function: str = "instance-segmentation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL - function: str = "search" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = SearchInputs - outputs_class: Type[TO] = SearchOutputs + inputs_class: Type[TI] = InstanceSegmentationInputs + outputs_class: Type[TO] = InstanceSegmentationOutputs -class VideoForcedAlignmentInputs(Inputs): - video: InputParam = None +class EmotionDetectionInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None @@ -2630,42 +2701,37 @@ class VideoForcedAlignmentInputs(Inputs): def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class VideoForcedAlignmentOutputs(Outputs): - text: OutputParam = None - video: OutputParam = None +class EmotionDetectionOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT) - self.video = self.create_param(code="video", data_type=DataType.VIDEO) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VideoForcedAlignment(AssetNode[VideoForcedAlignmentInputs, VideoForcedAlignmentOutputs]): +class EmotionDetection(AssetNode[EmotionDetectionInputs, EmotionDetectionOutputs]): """ - Video Forced Alignment is a process that synchronizes video footage with - corresponding audio tracks by precisely aligning the visual and auditory - elements, ensuring that the movements of speakers' lips match the spoken words. + Identifies human emotions from text or audio, enhancing user experience in +chatbots or customer feedback analysis. - InputType: video - OutputType: video + InputType: text + OutputType: label """ + function: str = "emotion-detection" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "video-forced-alignment" - input_type: str = DataType.VIDEO - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = VideoForcedAlignmentInputs - outputs_class: Type[TO] = VideoForcedAlignmentOutputs + inputs_class: Type[TI] = EmotionDetectionInputs + outputs_class: Type[TO] = EmotionDetectionOutputs -class VisemeGenerationInputs(Inputs): +class TextSpamDetectionInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None @@ -2679,7 +2745,7 @@ def __init__(self, node=None): self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class VisemeGenerationOutputs(Outputs): +class TextSpamDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2687,39 +2753,40 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VisemeGeneration(AssetNode[VisemeGenerationInputs, VisemeGenerationOutputs]): +class TextSpamDetection(AssetNode[TextSpamDetectionInputs, TextSpamDetectionOutputs]): """ - Viseme Generation is the process of creating visual representations of - phonemes, which are the distinct units of sound in speech, to synchronize lip - movements with spoken words in animations or virtual avatars. + Identifies and filters out unwanted or irrelevant text content, ideal for +moderating user-generated content or ensuring quality in communication +platforms. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "viseme-generation" + function: str = "text-spam-detection" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = VisemeGenerationInputs - outputs_class: Type[TO] = VisemeGenerationOutputs + inputs_class: Type[TI] = TextSpamDetectionInputs + outputs_class: Type[TO] = TextSpamDetectionOutputs -class TopicClassificationInputs(Inputs): +class TextDenormalizationInputs(Inputs): text: InputParam = None language: InputParam = None - script: InputParam = None - dialect: InputParam = None + lowercase_latin: InputParam = None + remove_accents: InputParam = None + remove_punctuation: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.lowercase_latin = self.create_param(code="lowercase_latin", data_type=DataType.TEXT, is_required=False) + self.remove_accents = self.create_param(code="remove_accents", data_type=DataType.TEXT, is_required=False) + self.remove_punctuation = self.create_param(code="remove_punctuation", data_type=DataType.TEXT, is_required=False) -class TopicClassificationOutputs(Outputs): +class TextDenormalizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2727,211 +2794,192 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TopicClassification(AssetNode[TopicClassificationInputs, TopicClassificationOutputs]): +class TextDenormalization(AssetNode[TextDenormalizationInputs, TextDenormalizationOutputs]): """ - Topic Classification is a natural language processing function that categorizes - text into predefined topics or subjects based on its content, enabling - efficient organization and retrieval of information. + Converts standardized or normalized text into its original, often more +readable, form. Useful in natural language generation tasks. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "topic-classification" + function: str = "text-denormalization" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = TopicClassificationInputs - outputs_class: Type[TO] = TopicClassificationOutputs + inputs_class: Type[TI] = TextDenormalizationInputs + outputs_class: Type[TO] = TextDenormalizationOutputs -class OffensiveLanguageIdentificationInputs(Inputs): - text: InputParam = None - language: InputParam = None - dialect: InputParam = None - script: InputParam = None +class ReferencelessAudioGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) + self.sources = self.create_param(code="sources", data_type=DataType.AUDIO, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class OffensiveLanguageIdentificationOutputs(Outputs): +class ReferencelessAudioGenerationMetricOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class OffensiveLanguageIdentification(AssetNode[OffensiveLanguageIdentificationInputs, OffensiveLanguageIdentificationOutputs]): +class ReferencelessAudioGenerationMetric(BaseMetric[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs]): """ - Offensive Language Identification is a function that analyzes text to detect - and flag language that is abusive, harmful, or inappropriate, helping to - maintain a respectful and safe communication environment. + The Referenceless Audio Generation Metric is a tool designed to evaluate the +quality of generated audio content without the need for a reference or original +audio sample for comparison. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "offensive-language-identification" + function: str = "referenceless-audio-generation-metric" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = OffensiveLanguageIdentificationInputs - outputs_class: Type[TO] = OffensiveLanguageIdentificationOutputs + inputs_class: Type[TI] = ReferencelessAudioGenerationMetricInputs + outputs_class: Type[TO] = ReferencelessAudioGenerationMetricOutputs -class SpeechTranslationInputs(Inputs): - source_audio: InputParam = None - sourcelanguage: InputParam = None - targetlanguage: InputParam = None +class AudioForcedAlignmentInputs(Inputs): + audio: InputParam = None + text: InputParam = None + language: InputParam = None dialect: InputParam = None - voice: InputParam = None script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) - self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) - self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class SpeechTranslationOutputs(Outputs): - data: OutputParam = None +class AudioForcedAlignmentOutputs(Outputs): + text: OutputParam = None + audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.text = self.create_param(code="text", data_type=DataType.TEXT) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class SpeechTranslation(AssetNode[SpeechTranslationInputs, SpeechTranslationOutputs]): +class AudioForcedAlignment(AssetNode[AudioForcedAlignmentInputs, AudioForcedAlignmentOutputs]): """ - Speech Translation is a technology that converts spoken language in real-time - from one language to another, enabling seamless communication between speakers - of different languages. + Synchronizes phonetic and phonological text with the corresponding segments in +an audio file. Useful in linguistic research and detailed transcription tasks. - InputType: audio - OutputType: text + InputType: audio + OutputType: audio """ - - function: str = "speech-translation" + function: str = "audio-forced-alignment" input_type: str = DataType.AUDIO - output_type: str = DataType.TEXT + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = SpeechTranslationInputs - outputs_class: Type[TO] = SpeechTranslationOutputs + inputs_class: Type[TI] = AudioForcedAlignmentInputs + outputs_class: Type[TO] = AudioForcedAlignmentOutputs -class SpeakerDiarizationAudioInputs(Inputs): - audio: InputParam = None +class VideoForcedAlignmentInputs(Inputs): + video: InputParam = None + text: InputParam = None language: InputParam = None - script: InputParam = None dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class SpeakerDiarizationAudioOutputs(Outputs): - data: OutputParam = None - audio: OutputParam = None - +class VideoForcedAlignmentOutputs(Outputs): + text: OutputParam = None + video: OutputParam = None + def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + self.text = self.create_param(code="text", data_type=DataType.TEXT) + self.video = self.create_param(code="video", data_type=DataType.VIDEO) -class SpeakerDiarizationAudio(BaseSegmentor[SpeakerDiarizationAudioInputs, SpeakerDiarizationAudioOutputs]): +class VideoForcedAlignment(AssetNode[VideoForcedAlignmentInputs, VideoForcedAlignmentOutputs]): """ - Speaker Diarization Audio is a process that involves segmenting an audio - recording into distinct sections, each corresponding to a different speaker, in - order to identify and differentiate between multiple speakers within the same - audio stream. + Aligns the transcription of spoken content in a video with its corresponding +timecodes, facilitating subtitle creation. - InputType: audio - OutputType: label + InputType: video + OutputType: video """ + function: str = "video-forced-alignment" + input_type: str = DataType.VIDEO + output_type: str = DataType.VIDEO - function: str = "speaker-diarization-audio" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = SpeakerDiarizationAudioInputs - outputs_class: Type[TO] = SpeakerDiarizationAudioOutputs + inputs_class: Type[TI] = VideoForcedAlignmentInputs + outputs_class: Type[TO] = VideoForcedAlignmentOutputs -class AudioTranscriptImprovementInputs(Inputs): - language: InputParam = None - dialect: InputParam = None - source_supplier: InputParam = None - is_medical: InputParam = None - source_audio: InputParam = None - script: InputParam = None +class ClassificationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + lowerIsBetter: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) - self.is_medical = self.create_param(code="is_medical", data_type=DataType.TEXT, is_required=True) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.LABEL, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.LABEL, is_required=True) + self.lowerIsBetter = self.create_param(code="lowerIsBetter", data_type=DataType.TEXT, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class AudioTranscriptImprovementOutputs(Outputs): +class ClassificationMetricOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.NUMBER) -class AudioTranscriptImprovement(AssetNode[AudioTranscriptImprovementInputs, AudioTranscriptImprovementOutputs]): +class ClassificationMetric(BaseMetric[ClassificationMetricInputs, ClassificationMetricOutputs]): """ - Audio Transcript Improvement is a function that enhances the accuracy and - clarity of transcribed audio recordings by correcting errors, refining - language, and ensuring the text faithfully represents the original spoken - content. + A Classification Metric is a quantitative measure used to evaluate the quality +and effectiveness of classification models. - InputType: audio - OutputType: text + InputType: text + OutputType: text """ - - function: str = "audio-transcript-improvement" - input_type: str = DataType.AUDIO + function: str = "classification-metric" + input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = AudioTranscriptImprovementInputs - outputs_class: Type[TO] = AudioTranscriptImprovementOutputs + inputs_class: Type[TI] = ClassificationMetricInputs + outputs_class: Type[TO] = ClassificationMetricOutputs -class SpeechNonSpeechClassificationInputs(Inputs): - audio: InputParam = None - language: InputParam = None - script: InputParam = None - dialect: InputParam = None +class AutoMaskGenerationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) -class SpeechNonSpeechClassificationOutputs(Outputs): +class AutoMaskGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2939,42 +2987,40 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class SpeechNonSpeechClassification(AssetNode[SpeechNonSpeechClassificationInputs, SpeechNonSpeechClassificationOutputs]): +class AutoMaskGeneration(AssetNode[AutoMaskGenerationInputs, AutoMaskGenerationOutputs]): """ - The function "Speech or Non-Speech Classification" is designed to analyze audio - input and determine whether the sound is human speech or non-speech noise, - enabling applications such as voice recognition systems to filter out - irrelevant background sounds. + Auto-mask generation refers to the automated process of creating masks in image +processing or computer vision, typically for segmentation tasks. A mask is a +binary or multi-class image that labels different parts of an image, usually +separating the foreground (objects of interest) from the background, or +identifying specific object classes in an image. - InputType: audio - OutputType: label + InputType: image + OutputType: label """ - - function: str = "speech-non-speech-classification" - input_type: str = DataType.AUDIO + function: str = "auto-mask-generation" + input_type: str = DataType.IMAGE output_type: str = DataType.LABEL - inputs_class: Type[TI] = SpeechNonSpeechClassificationInputs - outputs_class: Type[TO] = SpeechNonSpeechClassificationOutputs + inputs_class: Type[TI] = AutoMaskGenerationInputs + outputs_class: Type[TO] = AutoMaskGenerationOutputs -class TextDenormalizationInputs(Inputs): +class TextEmbeddingInputs(Inputs): text: InputParam = None language: InputParam = None - lowercase_latin: InputParam = None - remove_accents: InputParam = None - remove_punctuation: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.lowercase_latin = self.create_param(code="lowercase_latin", data_type=DataType.TEXT, is_required=False) - self.remove_accents = self.create_param(code="remove_accents", data_type=DataType.TEXT, is_required=False) - self.remove_punctuation = self.create_param(code="remove_punctuation", data_type=DataType.TEXT, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextDenormalizationOutputs(Outputs): +class TextEmbeddingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2982,35 +3028,35 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextDenormalization(AssetNode[TextDenormalizationInputs, TextDenormalizationOutputs]): +class TextEmbedding(AssetNode[TextEmbeddingInputs, TextEmbeddingOutputs]): """ - Text Denormalization is the process of converting abbreviated, contracted, or - otherwise simplified text into its full, standard form, often to improve - readability and ensure consistency in natural language processing tasks. + Text embedding is a process that converts text into numerical vectors, +capturing the semantic meaning and contextual relationships of words or +phrases, enabling machines to understand and analyze natural language more +effectively. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "text-denormalization" + function: str = "text-embedding" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextDenormalizationInputs - outputs_class: Type[TO] = TextDenormalizationOutputs + inputs_class: Type[TI] = TextEmbeddingInputs + outputs_class: Type[TO] = TextEmbeddingOutputs -class ImageContentModerationInputs(Inputs): - image: InputParam = None - min_confidence: InputParam = None +class FactCheckingInputs(Inputs): + language: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class ImageContentModerationOutputs(Outputs): +class FactCheckingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -3018,70 +3064,62 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class ImageContentModeration(AssetNode[ImageContentModerationInputs, ImageContentModerationOutputs]): +class FactChecking(AssetNode[FactCheckingInputs, FactCheckingOutputs]): """ - Image Content Moderation is a process that involves analyzing and filtering - images to detect and manage inappropriate, harmful, or sensitive content, - ensuring compliance with community guidelines and legal standards. + Fact Checking is the process of verifying the accuracy and truthfulness of +information, statements, or claims by cross-referencing with reliable sources +and evidence. - InputType: image - OutputType: label + InputType: text + OutputType: label """ - - function: str = "image-content-moderation" - input_type: str = DataType.IMAGE + function: str = "fact-checking" + input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = ImageContentModerationInputs - outputs_class: Type[TO] = ImageContentModerationOutputs + inputs_class: Type[TI] = FactCheckingInputs + outputs_class: Type[TO] = FactCheckingOutputs -class ReferencelessTextGenerationMetricDefaultInputs(Inputs): - hypotheses: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None +class TextToAudioInputs(Inputs): + text: InputParam = None + language: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) - self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) -class ReferencelessTextGenerationMetricDefaultOutputs(Outputs): +class TextToAudioOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class ReferencelessTextGenerationMetricDefault( - BaseMetric[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs] -): +class TextToAudio(AssetNode[TextToAudioInputs, TextToAudioOutputs]): """ - The Referenceless Text Generation Metric Default is a function designed to - evaluate the quality of generated text without relying on reference texts for - comparison. + The Text to Audio function converts written text into spoken words, allowing +users to listen to the content instead of reading it. - InputType: text - OutputType: text + InputType: text + OutputType: audio """ - - function: str = "referenceless-text-generation-metric-default" + function: str = "text-to-audio" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = ReferencelessTextGenerationMetricDefaultInputs - outputs_class: Type[TO] = ReferencelessTextGenerationMetricDefaultOutputs + inputs_class: Type[TI] = TextToAudioInputs + outputs_class: Type[TO] = TextToAudioOutputs -class NamedEntityRecognitionInputs(Inputs): +class FillTextMaskInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None script: InputParam = None - domain: InputParam = None def __init__(self, node=None): super().__init__(node=node) @@ -3089,192 +3127,203 @@ def __init__(self, node=None): self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) -class NamedEntityRecognitionOutputs(Outputs): +class FillTextMaskOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class NamedEntityRecognition(AssetNode[NamedEntityRecognitionInputs, NamedEntityRecognitionOutputs]): +class FillTextMask(AssetNode[FillTextMaskInputs, FillTextMaskOutputs]): """ - Named Entity Recognition (NER) is a natural language processing task that - involves identifying and classifying proper nouns in text into predefined - categories such as names of people, organizations, locations, dates, and other - entities. + Completes missing parts of a text based on the context, ideal for content +generation or data augmentation tasks. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "named-entity-recognition" + function: str = "fill-text-mask" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = NamedEntityRecognitionInputs - outputs_class: Type[TO] = NamedEntityRecognitionOutputs + inputs_class: Type[TI] = FillTextMaskInputs + outputs_class: Type[TO] = FillTextMaskOutputs -class TextContentModerationInputs(Inputs): +class VoiceCloningInputs(Inputs): text: InputParam = None + audio: InputParam = None language: InputParam = None dialect: InputParam = None + voice: InputParam = None script: InputParam = None + type: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) -class TextContentModerationOutputs(Outputs): +class VoiceCloningOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class TextContentModeration(AssetNode[TextContentModerationInputs, TextContentModerationOutputs]): +class VoiceCloning(AssetNode[VoiceCloningInputs, VoiceCloningOutputs]): """ - Text Content Moderation is the process of reviewing, filtering, and managing - user-generated content to ensure it adheres to community guidelines, legal - standards, and platform policies, thereby maintaining a safe and respectful - online environment. + Replicates a person's voice based on a sample, allowing for the generation of +speech in that person's tone and style. Used cautiously due to ethical +considerations. - InputType: text - OutputType: label + InputType: text + OutputType: audio """ - - function: str = "text-content-moderation" + function: str = "voice-cloning" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = TextContentModerationInputs - outputs_class: Type[TO] = TextContentModerationOutputs + inputs_class: Type[TI] = VoiceCloningInputs + outputs_class: Type[TO] = VoiceCloningOutputs -class SpeakerDiarizationVideoInputs(Inputs): - video: InputParam = None +class DiacritizationInputs(Inputs): language: InputParam = None - script: InputParam = None dialect: InputParam = None + script: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=True) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class SpeakerDiarizationVideoOutputs(Outputs): +class DiacritizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.VIDEO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class SpeakerDiarizationVideo(AssetNode[SpeakerDiarizationVideoInputs, SpeakerDiarizationVideoOutputs]): +class Diacritization(AssetNode[DiacritizationInputs, DiacritizationOutputs]): """ - The Speaker Diarization Video function identifies and segments different - speakers in a video, attributing portions of the audio to individual speakers - to facilitate analysis and understanding of multi-speaker conversations. + Adds diacritical marks to text, essential for languages where meaning can +change based on diacritics. - InputType: video - OutputType: label + InputType: text + OutputType: text """ + function: str = "diacritization" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - function: str = "speaker-diarization-video" - input_type: str = DataType.VIDEO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = SpeakerDiarizationVideoInputs - outputs_class: Type[TO] = SpeakerDiarizationVideoOutputs + inputs_class: Type[TI] = DiacritizationInputs + outputs_class: Type[TO] = DiacritizationOutputs -class SplitOnSilenceInputs(Inputs): - audio: InputParam = None +class SpeechTranslationInputs(Inputs): + source_audio: InputParam = None + sourcelanguage: InputParam = None + targetlanguage: InputParam = None + dialect: InputParam = None + voice: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) + self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class SplitOnSilenceOutputs(Outputs): +class SpeechTranslationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class SplitOnSilence(AssetNode[SplitOnSilenceInputs, SplitOnSilenceOutputs]): +class SpeechTranslation(AssetNode[SpeechTranslationInputs, SpeechTranslationOutputs]): """ - The "Split On Silence" function divides an audio recording into separate - segments based on periods of silence, allowing for easier editing and analysis - of individual sections. + Speech Translation is a technology that converts spoken language in real-time +from one language to another, enabling seamless communication between speakers +of different languages. - InputType: audio - OutputType: audio + InputType: audio + OutputType: text """ - - function: str = "split-on-silence" + function: str = "speech-translation" input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO + output_type: str = DataType.TEXT - inputs_class: Type[TI] = SplitOnSilenceInputs - outputs_class: Type[TO] = SplitOnSilenceOutputs + inputs_class: Type[TI] = SpeechTranslationInputs + outputs_class: Type[TO] = SpeechTranslationOutputs -class EmotionDetectionInputs(Inputs): - text: InputParam = None +class SpeechSynthesisInputs(Inputs): + audio: InputParam = None language: InputParam = None dialect: InputParam = None + voice: InputParam = None script: InputParam = None + text: InputParam = None + type: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) -class EmotionDetectionOutputs(Outputs): +class SpeechSynthesisOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class EmotionDetection(AssetNode[EmotionDetectionInputs, EmotionDetectionOutputs]): +class SpeechSynthesis(AssetNode[SpeechSynthesisInputs, SpeechSynthesisOutputs]): """ - Emotion Detection is a process that involves analyzing text to identify and - categorize the emotional states or sentiments expressed by individuals, such as - happiness, sadness, anger, or fear. + Generates human-like speech from written text. Ideal for text-to-speech +applications, audiobooks, and voice assistants. - InputType: text - OutputType: label + InputType: text + OutputType: audio """ - - function: str = "emotion-detection" + function: str = "speech-synthesis" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = EmotionDetectionInputs - outputs_class: Type[TO] = EmotionDetectionOutputs + inputs_class: Type[TI] = SpeechSynthesisInputs + outputs_class: Type[TO] = SpeechSynthesisOutputs -class TextSpamDetectionInputs(Inputs): +class TextContentModerationInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None @@ -3288,7 +3337,7 @@ def __init__(self, node=None): self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextSpamDetectionOutputs(Outputs): +class TextContentModerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -3296,48 +3345,39 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextSpamDetection(AssetNode[TextSpamDetectionInputs, TextSpamDetectionOutputs]): +class TextContentModeration(AssetNode[TextContentModerationInputs, TextContentModerationOutputs]): """ - Text Spam Detection is a process that involves analyzing and identifying - unsolicited or irrelevant messages within text communications, typically using - algorithms and machine learning techniques to filter out spam and ensure the - integrity of the communication platform. + Scans and identifies potentially harmful, offensive, or inappropriate textual +content, ensuring safer user environments. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "text-spam-detection" + function: str = "text-content-moderation" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = TextSpamDetectionInputs - outputs_class: Type[TO] = TextSpamDetectionOutputs + inputs_class: Type[TI] = TextContentModerationInputs + outputs_class: Type[TO] = TextContentModerationOutputs -class TranslationInputs(Inputs): +class SubtitlingTranslationInputs(Inputs): text: InputParam = None sourcelanguage: InputParam = None - targetlanguage: InputParam = None - script_in: InputParam = None - script_out: InputParam = None dialect_in: InputParam = None - dialect_out: InputParam = None - context: InputParam = None + target_supplier: InputParam = None + targetlanguages: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) - self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) - self.script_in = self.create_param(code="script_in", data_type=DataType.LABEL, is_required=False) - self.script_out = self.create_param(code="script_out", data_type=DataType.LABEL, is_required=False) self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) - self.dialect_out = self.create_param(code="dialect_out", data_type=DataType.LABEL, is_required=False) - self.context = self.create_param(code="context", data_type=DataType.LABEL, is_required=False) + self.target_supplier = self.create_param(code="target_supplier", data_type=DataType.LABEL, is_required=False) + self.targetlanguages = self.create_param(code="targetlanguages", data_type=DataType.LABEL, is_required=False) -class TranslationOutputs(Outputs): +class SubtitlingTranslationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -3345,83 +3385,79 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class Translation(AssetNode[TranslationInputs, TranslationOutputs]): +class SubtitlingTranslation(AssetNode[SubtitlingTranslationInputs, SubtitlingTranslationOutputs]): """ - Translation is the process of converting text from one language into an - equivalent text in another language, preserving the original meaning and - context. + Converts the text of subtitles from one language to another, ensuring context +and cultural nuances are maintained. Essential for global content distribution. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "translation" + function: str = "subtitling-translation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TranslationInputs - outputs_class: Type[TO] = TranslationOutputs + inputs_class: Type[TI] = SubtitlingTranslationInputs + outputs_class: Type[TO] = SubtitlingTranslationOutputs -class VoiceActivityDetectionInputs(Inputs): - audio: InputParam = None - onset: InputParam = None - offset: InputParam = None - min_duration_on: InputParam = None - min_duration_off: InputParam = None +class AudioTranscriptAnalysisInputs(Inputs): + language: InputParam = None + dialect: InputParam = None + source_supplier: InputParam = None + source_audio: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.onset = self.create_param(code="onset", data_type=DataType.TEXT, is_required=False) - self.offset = self.create_param(code="offset", data_type=DataType.TEXT, is_required=False) - self.min_duration_on = self.create_param(code="min_duration_on", data_type=DataType.TEXT, is_required=False) - self.min_duration_off = self.create_param(code="min_duration_off", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class VoiceActivityDetectionOutputs(Outputs): +class AudioTranscriptAnalysisOutputs(Outputs): data: OutputParam = None - audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class VoiceActivityDetection(BaseSegmentor[VoiceActivityDetectionInputs, VoiceActivityDetectionOutputs]): +class AudioTranscriptAnalysis(AssetNode[AudioTranscriptAnalysisInputs, AudioTranscriptAnalysisOutputs]): """ - Voice Activity Detection (VAD) is a technology that identifies the presence or - absence of human speech within an audio signal, enabling systems to distinguish - between spoken words and background noise. + Analyzes transcribed audio data for insights, patterns, or specific information +extraction. - InputType: audio - OutputType: audio + InputType: audio + OutputType: text """ - - function: str = "voice-activity-detection" + function: str = "audio-transcript-analysis" input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO + output_type: str = DataType.TEXT - inputs_class: Type[TI] = VoiceActivityDetectionInputs - outputs_class: Type[TO] = VoiceActivityDetectionOutputs + inputs_class: Type[TI] = AudioTranscriptAnalysisInputs + outputs_class: Type[TO] = AudioTranscriptAnalysisOutputs -class SpeechEmbeddingInputs(Inputs): - audio: InputParam = None +class TextGenerationInputs(Inputs): + text: InputParam = None + prompt: InputParam = None + context: InputParam = None language: InputParam = None - dialect: InputParam = None script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.prompt = self.create_param(code="prompt", data_type=DataType.TEXT, is_required=False) + self.context = self.create_param(code="context", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class SpeechEmbeddingOutputs(Outputs): +class TextGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -3429,107 +3465,100 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class SpeechEmbedding(AssetNode[SpeechEmbeddingInputs, SpeechEmbeddingOutputs]): +class TextGeneration(AssetNode[TextGenerationInputs, TextGenerationOutputs]): """ - Speech Embedding is a process that transforms spoken language into a fixed- - dimensional vector representation, capturing essential features and - characteristics of the speech for tasks such as recognition, classification, - and analysis. + Creates coherent and contextually relevant textual content based on prompts or +certain parameters. Useful for chatbots, content creation, and data +augmentation. - InputType: audio - OutputType: text + InputType: text + OutputType: text """ - - function: str = "speech-embedding" - input_type: str = DataType.AUDIO + function: str = "text-generation" + input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = SpeechEmbeddingInputs - outputs_class: Type[TO] = SpeechEmbeddingOutputs + inputs_class: Type[TI] = TextGenerationInputs + outputs_class: Type[TO] = TextGenerationOutputs -class SubtitlingTranslationInputs(Inputs): +class TextNormalizationInputs(Inputs): text: InputParam = None - sourcelanguage: InputParam = None - dialect_in: InputParam = None - target_supplier: InputParam = None - targetlanguages: InputParam = None + language: InputParam = None + settings: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) - self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) - self.target_supplier = self.create_param(code="target_supplier", data_type=DataType.LABEL, is_required=False) - self.targetlanguages = self.create_param(code="targetlanguages", data_type=DataType.LABEL, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.settings = self.create_param(code="settings", data_type=DataType.TEXT, is_required=False) -class SubtitlingTranslationOutputs(Outputs): +class TextNormalizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class SubtitlingTranslation(AssetNode[SubtitlingTranslationInputs, SubtitlingTranslationOutputs]): +class TextNormalization(AssetNode[TextNormalizationInputs, TextNormalizationOutputs]): """ - Subtitling Translation is the process of converting spoken dialogue from one - language into written text in another language, which is then displayed on- - screen to aid viewers in understanding the content. + Converts unstructured or non-standard textual data into a more readable and +uniform format, dealing with abbreviations, numerals, and other non-standard +words. - InputType: text - OutputType: text + InputType: text + OutputType: label """ - - function: str = "subtitling-translation" + function: str = "text-normalization" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.LABEL - inputs_class: Type[TI] = SubtitlingTranslationInputs - outputs_class: Type[TO] = SubtitlingTranslationOutputs + inputs_class: Type[TI] = TextNormalizationInputs + outputs_class: Type[TO] = TextNormalizationOutputs -class TextGenerationInputs(Inputs): - text: InputParam = None - prompt: InputParam = None - context: InputParam = None - language: InputParam = None - script: InputParam = None +class VoiceActivityDetectionInputs(Inputs): + audio: InputParam = None + onset: InputParam = None + offset: InputParam = None + min_duration_on: InputParam = None + min_duration_off: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.prompt = self.create_param(code="prompt", data_type=DataType.TEXT, is_required=False) - self.context = self.create_param(code="context", data_type=DataType.TEXT, is_required=False) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.onset = self.create_param(code="onset", data_type=DataType.TEXT, is_required=False) + self.offset = self.create_param(code="offset", data_type=DataType.TEXT, is_required=False) + self.min_duration_on = self.create_param(code="min_duration_on", data_type=DataType.TEXT, is_required=False) + self.min_duration_off = self.create_param(code="min_duration_off", data_type=DataType.TEXT, is_required=False) -class TextGenerationOutputs(Outputs): +class VoiceActivityDetectionOutputs(Outputs): data: OutputParam = None + audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class TextGeneration(AssetNode[TextGenerationInputs, TextGenerationOutputs]): +class VoiceActivityDetection(BaseSegmentor[VoiceActivityDetectionInputs, VoiceActivityDetectionOutputs]): """ - Text Generation is a process in which artificial intelligence models, such as - neural networks, produce coherent and contextually relevant text based on a - given input or prompt, often mimicking human writing styles and patterns. + Determines when a person is speaking in an audio clip. It's an essential +preprocessing step for other audio-related tasks. - InputType: text - OutputType: text + InputType: audio + OutputType: audio """ + function: str = "voice-activity-detection" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO - function: str = "text-generation" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = TextGenerationInputs - outputs_class: Type[TO] = TextGenerationOutputs + inputs_class: Type[TI] = VoiceActivityDetectionInputs + outputs_class: Type[TO] = VoiceActivityDetectionOutputs class VideoUnderstandingInputs(Inputs): @@ -3558,14 +3587,13 @@ def __init__(self, node=None): class VideoUnderstanding(AssetNode[VideoUnderstandingInputs, VideoUnderstandingOutputs]): """ - Video Understanding is the process of analyzing and interpreting video content - to extract meaningful information, such as identifying objects, actions, - events, and contextual relationships within the footage. + Video Understanding is the process of analyzing and interpreting video content +to extract meaningful information, such as identifying objects, actions, +events, and contextual relationships within the footage. - InputType: video - OutputType: text + InputType: video + OutputType: text """ - function: str = "video-understanding" input_type: str = DataType.VIDEO output_type: str = DataType.TEXT @@ -3574,79 +3602,50 @@ class VideoUnderstanding(AssetNode[VideoUnderstandingInputs, VideoUnderstandingO outputs_class: Type[TO] = VideoUnderstandingOutputs -class TextToVideoGenerationInputs(Inputs): - text: InputParam = None - language: InputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) - - -class TextToVideoGenerationOutputs(Outputs): - data: OutputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.VIDEO) - - -class TextToVideoGeneration(AssetNode[TextToVideoGenerationInputs, TextToVideoGenerationOutputs]): - """ - Text To Video Generation is a process that converts written descriptions or - scripts into dynamic, visual video content using advanced algorithms and - artificial intelligence. - - InputType: text - OutputType: video - """ - - function: str = "text-to-video-generation" - input_type: str = DataType.TEXT - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = TextToVideoGenerationInputs - outputs_class: Type[TO] = TextToVideoGenerationOutputs - - -class TextNormalizationInputs(Inputs): +class TranslationInputs(Inputs): text: InputParam = None - language: InputParam = None - settings: InputParam = None + sourcelanguage: InputParam = None + targetlanguage: InputParam = None + script_in: InputParam = None + script_out: InputParam = None + dialect_in: InputParam = None + dialect_out: InputParam = None + context: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) - self.settings = self.create_param(code="settings", data_type=DataType.TEXT, is_required=False) + self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) + self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) + self.script_in = self.create_param(code="script_in", data_type=DataType.LABEL, is_required=False) + self.script_out = self.create_param(code="script_out", data_type=DataType.LABEL, is_required=False) + self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) + self.dialect_out = self.create_param(code="dialect_out", data_type=DataType.LABEL, is_required=False) + self.context = self.create_param(code="context", data_type=DataType.LABEL, is_required=False) -class TextNormalizationOutputs(Outputs): +class TranslationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextNormalization(AssetNode[TextNormalizationInputs, TextNormalizationOutputs]): +class Translation(AssetNode[TranslationInputs, TranslationOutputs]): """ - Text normalization is the process of transforming text into a standard, - consistent format by correcting spelling errors, converting all characters to a - uniform case, removing punctuation, and expanding abbreviations to improve the - text's readability and usability for further processing or analysis. + Converts text from one language to another while maintaining the original +message's essence and context. Crucial for global communication. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "text-normalization" + function: str = "translation" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextNormalizationInputs - outputs_class: Type[TO] = TextNormalizationOutputs + inputs_class: Type[TI] = TranslationInputs + outputs_class: Type[TO] = TranslationOutputs class SpeechRecognitionInputs(Inputs): @@ -3675,13 +3674,12 @@ def __init__(self, node=None): class SpeechRecognition(AssetNode[SpeechRecognitionInputs, SpeechRecognitionOutputs]): """ - Speech recognition is a technology that enables a computer or device to - identify and process spoken language, converting it into text. + Converts spoken language into written text. Useful for transcription services, +voice assistants, and applications requiring voice-to-text capabilities. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ - function: str = "speech-recognition" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -3718,15 +3716,12 @@ def __init__(self, node=None): class Subtitling(AssetNode[SubtitlingInputs, SubtitlingOutputs]): """ - Subtitling is the process of displaying written text on a screen to represent - the spoken dialogue, narration, or other audio elements in a video, typically - to aid viewers who are deaf or hard of hearing, or to provide translations for - audiences who speak different languages. + Generates accurate subtitles for videos, enhancing accessibility for diverse +audiences. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ - function: str = "subtitling" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -3735,927 +3730,800 @@ class Subtitling(AssetNode[SubtitlingInputs, SubtitlingOutputs]): outputs_class: Type[TO] = SubtitlingOutputs -class ClassificationMetricInputs(Inputs): - hypotheses: InputParam = None - references: InputParam = None - lowerIsBetter: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.LABEL, is_required=True) - self.references = self.create_param(code="references", data_type=DataType.LABEL, is_required=True) - self.lowerIsBetter = self.create_param(code="lowerIsBetter", data_type=DataType.TEXT, is_required=False) - self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) - - -class ClassificationMetricOutputs(Outputs): - data: OutputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.NUMBER) - - -class ClassificationMetric(BaseMetric[ClassificationMetricInputs, ClassificationMetricOutputs]): - """ - A Classification Metric is a quantitative measure used to evaluate the quality - and effectiveness of classification models. - - InputType: text - OutputType: text - """ - - function: str = "classification-metric" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = ClassificationMetricInputs - outputs_class: Type[TO] = ClassificationMetricOutputs - - -class TextToImageGenerationInputs(Inputs): - text: InputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - - -class TextToImageGenerationOutputs(Outputs): - data: OutputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.IMAGE) - - -class TextToImageGeneration(AssetNode[TextToImageGenerationInputs, TextToImageGenerationOutputs]): - """ - Text To Image Generation is a process where a system creates visual images - based on descriptive text input, translating written language into - corresponding graphical representations. - - InputType: text - OutputType: image - """ - - function: str = "text-to-image-generation" - input_type: str = DataType.TEXT - output_type: str = DataType.IMAGE - - inputs_class: Type[TI] = TextToImageGenerationInputs - outputs_class: Type[TO] = TextToImageGenerationOutputs - class Pipeline(DefaultPipeline): + def object_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ObjectDetection: """ - Object Detection is a computer vision technology that identifies and locates - objects within an image, typically by drawing bounding boxes around the - detected objects and classifying them into predefined categories. + Object Detection is a computer vision technology that identifies and locates +objects within an image, typically by drawing bounding boxes around the +detected objects and classifying them into predefined categories. """ return ObjectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentification: """ - Language Identification is the process of automatically determining the - language in which a given piece of text is written. + Detects the language in which a given text is written, aiding in multilingual +platforms or content localization. """ return LanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def ocr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Ocr: + def depth_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DepthEstimation: """ - OCR, or Optical Character Recognition, is a technology that converts different - types of documents, such as scanned paper documents, PDFs, or images captured - by a digital camera, into editable and searchable data by recognizing and - extracting text from the images. + Depth estimation is a computational process that determines the distance of +objects from a viewpoint, typically using visual data from cameras or sensors +to create a three-dimensional understanding of a scene. """ - return Ocr(*args, asset_id=asset_id, pipeline=self, **kwargs) + return DepthEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) def script_execution(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ScriptExecution: """ - Script Execution refers to the process of running a set of programmed - instructions or code within a computing environment, enabling the automated - performance of tasks, calculations, or operations as defined by the script. + Script Execution refers to the process of running a set of programmed +instructions or code within a computing environment, enabling the automated +performance of tasks, calculations, or operations as defined by the script. """ return ScriptExecution(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageLabelDetection: + def image_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageEmbedding: """ - Image Label Detection is a function that automatically identifies and assigns - descriptive tags or labels to objects, scenes, or elements within an image, - enabling easier categorization, search, and analysis of visual content. + Image Embedding is a process that transforms an image into a fixed-dimensional +vector representation, capturing its essential features and enabling efficient +comparison, retrieval, and analysis in various machine learning and computer +vision tasks. """ - return ImageLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_captioning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCaptioning: + def image_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageToVideoGeneration: """ - Image Captioning is a process that involves generating a textual description of - an image, typically using machine learning models to analyze the visual content - and produce coherent and contextually relevant sentences that describe the - objects, actions, and scenes depicted in the image. + The Image To Video Generation function transforms a series of static images +into a cohesive, dynamic video sequence, often incorporating transitions, +effects, and synchronization with audio to create a visually engaging +narrative. """ - return ImageCaptioning(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioLanguageIdentification: + def image_impainting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageImpainting: """ - Audio Language Identification is a process that involves analyzing an audio - recording to determine the language being spoken. + Image inpainting is a process that involves filling in missing or damaged parts +of an image in a way that is visually coherent and seamlessly blends with the +surrounding areas, often using advanced algorithms and techniques to restore +the image to its original or intended appearance. """ - return AudioLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageImpainting(*args, asset_id=asset_id, pipeline=self, **kwargs) - def asr_age_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrAgeClassification: + def style_transfer(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> StyleTransfer: """ - The ASR Age Classification function is designed to analyze audio recordings of - speech to determine the speaker's age group by leveraging automatic speech - recognition (ASR) technology and machine learning algorithms. + Style Transfer is a technique in artificial intelligence that applies the +visual style of one image (such as the brushstrokes of a famous painting) to +the content of another image, effectively blending the artistic elements of the +first image with the subject matter of the second. """ - return AsrAgeClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return StyleTransfer(*args, asset_id=asset_id, pipeline=self, **kwargs) - def benchmark_scoring_mt(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringMt: + def multi_class_text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiClassTextClassification: """ - Benchmark Scoring MT is a function designed to evaluate and score machine - translation systems by comparing their output against a set of predefined - benchmarks, thereby assessing their accuracy and performance. + Multi Class Text Classification is a natural language processing task that +involves categorizing a given text into one of several predefined classes or +categories based on its content. """ - return BenchmarkScoringMt(*args, asset_id=asset_id, pipeline=self, **kwargs) + return MultiClassTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def asr_gender_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrGenderClassification: + def part_of_speech_tagging(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> PartOfSpeechTagging: """ - The ASR Gender Classification function analyzes audio recordings to determine - and classify the speaker's gender based on their voice characteristics. + Part of Speech Tagging is a natural language processing task that involves +assigning each word in a sentence its corresponding part of speech, such as +noun, verb, adjective, or adverb, based on its role and context within the +sentence. """ - return AsrGenderClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return PartOfSpeechTagging(*args, asset_id=asset_id, pipeline=self, **kwargs) - def base_model(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BaseModel: + def metric_aggregation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MetricAggregation: """ - The Base-Model function serves as a foundational framework designed to provide - essential features and capabilities upon which more specialized or advanced - models can be built and customized. + Metric Aggregation is a function that computes and summarizes numerical data by +applying statistical operations, such as averaging, summing, or finding the +minimum and maximum values, to provide insights and facilitate analysis of +large datasets. """ - return BaseModel(*args, asset_id=asset_id, pipeline=self, **kwargs) + return MetricAggregation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def language_identification_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentificationAudio: + def image_colorization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageColorization: """ - The Language Identification Audio function analyzes audio input to determine - and identify the language being spoken. + Image colorization is a process that involves adding color to grayscale images, +transforming them from black-and-white to full-color representations, often +using advanced algorithms and machine learning techniques to predict and apply +the appropriate hues and shades. """ - return LanguageIdentificationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageColorization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def loglikelihood(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Loglikelihood: + def intent_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> IntentClassification: """ - The Log Likelihood function measures the probability of observing the given - data under a specific statistical model by taking the natural logarithm of the - likelihood function, thereby transforming the product of probabilities into a - sum, which simplifies the process of optimization and parameter estimation. + Intent Classification is a natural language processing task that involves +analyzing and categorizing user text input to determine the underlying purpose +or goal behind the communication, such as booking a flight, asking for weather +information, or setting a reminder. """ - return Loglikelihood(*args, asset_id=asset_id, pipeline=self, **kwargs) + return IntentClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoEmbedding: + def audio_intent_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioIntentDetection: """ - Video Embedding is a process that transforms video content into a fixed- - dimensional vector representation, capturing essential features and patterns to - facilitate tasks such as retrieval, classification, and recommendation. + Audio Intent Detection is a process that involves analyzing audio signals to +identify and interpret the underlying intentions or purposes behind spoken +words, enabling systems to understand and respond appropriately to human +speech. """ - return VideoEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioIntentDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_segmenation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSegmenation: + def asr_quality_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrQualityEstimation: """ - Text Segmentation is the process of dividing a continuous text into meaningful - units, such as words, sentences, or topics, to facilitate easier analysis and - understanding. + ASR Quality Estimation is a process that evaluates the accuracy and reliability +of automatic speech recognition systems by analyzing their performance in +transcribing spoken language into text. """ - return TextSegmenation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AsrQualityEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageEmbedding: + def search(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Search: """ - Image Embedding is a process that transforms an image into a fixed-dimensional - vector representation, capturing its essential features and enabling efficient - comparison, retrieval, and analysis in various machine learning and computer - vision tasks. + An algorithm that identifies and returns data or items that match particular +keywords or conditions from a dataset. A fundamental tool for databases and +websites. """ - return ImageEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Search(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_manipulation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageManipulation: + def viseme_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisemeGeneration: """ - Image Manipulation refers to the process of altering or enhancing digital - images using various techniques and tools to achieve desired visual effects, - correct imperfections, or transform the image's appearance. + Viseme Generation is the process of creating visual representations of +phonemes, which are the distinct units of sound in speech, to synchronize lip +movements with spoken words in animations or virtual avatars. """ - return ImageManipulation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VisemeGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageToVideoGeneration: + def ocr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Ocr: """ - The Image To Video Generation function transforms a series of static images - into a cohesive, dynamic video sequence, often incorporating transitions, - effects, and synchronization with audio to create a visually engaging - narrative. + Converts images of typed, handwritten, or printed text into machine-encoded +text. Used in digitizing printed texts for data retrieval. """ - return ImageToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Ocr(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioForcedAlignment: + def loglikelihood(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Loglikelihood: """ - Audio Forced Alignment is a process that synchronizes a given audio recording - with its corresponding transcript by precisely aligning each spoken word or - phoneme to its exact timing within the audio. + The Log Likelihood function measures the probability of observing the given +data under a specific statistical model by taking the natural logarithm of the +likelihood function, thereby transforming the product of probabilities into a +sum, which simplifies the process of optimization and parameter estimation. """ - return AudioForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Loglikelihood(*args, asset_id=asset_id, pipeline=self, **kwargs) - def benchmark_scoring_asr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringAsr: + def video_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoEmbedding: """ - Benchmark Scoring ASR is a function that evaluates and compares the performance - of automatic speech recognition systems by analyzing their accuracy, speed, and - other relevant metrics against a standardized set of benchmarks. + Video Embedding is a process that transforms video content into a fixed- +dimensional vector representation, capturing essential features and patterns to +facilitate tasks such as retrieval, classification, and recommendation. """ - return BenchmarkScoringAsr(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def visual_question_answering(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisualQuestionAnswering: + def text_segmenation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSegmenation: """ - Visual Question Answering (VQA) is a task in artificial intelligence that - involves analyzing an image and providing accurate, contextually relevant - answers to questions posed about the visual content of that image. + Text Segmentation is the process of dividing a continuous text into meaningful +units, such as words, sentences, or topics, to facilitate easier analysis and +understanding. """ - return VisualQuestionAnswering(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextSegmenation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def document_image_parsing(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentImageParsing: + def expression_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ExpressionDetection: """ - Document Image Parsing is the process of analyzing and converting scanned or - photographed images of documents into structured, machine-readable formats by - identifying and extracting text, layout, and other relevant information. + Expression Detection is the process of identifying and analyzing facial +expressions to interpret emotions or intentions using AI and computer vision +techniques. """ - return DocumentImageParsing(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ExpressionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def document_information_extraction( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> DocumentInformationExtraction: + def speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechClassification: """ - Document Information Extraction is the process of automatically identifying, - extracting, and structuring relevant data from unstructured or semi-structured - documents, such as invoices, receipts, contracts, and forms, to facilitate - easier data management and analysis. + Categorizes audio clips based on their content, aiding in content organization +and targeted actions. """ - return DocumentInformationExtraction(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def depth_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DepthEstimation: + def inverse_text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InverseTextNormalization: """ - Depth estimation is a computational process that determines the distance of - objects from a viewpoint, typically using visual data from cameras or sensors - to create a three-dimensional understanding of a scene. + Inverse Text Normalization is the process of converting spoken or written +language in its normalized form, such as numbers, dates, and abbreviations, +back into their original, more complex or detailed textual representations. """ - return DepthEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return InverseTextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoGeneration: + def extract_audio_from_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ExtractAudioFromVideo: """ - Video Generation is the process of creating video content through automated or - semi-automated means, often utilizing algorithms, artificial intelligence, or - software tools to produce visual and audio elements that can range from simple - animations to complex, realistic scenes. + Isolates and extracts audio tracks from video files, aiding in audio analysis +or transcription tasks. """ - return VideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ExtractAudioFromVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_audio_generation_metric( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> ReferencelessAudioGenerationMetric: + def image_compression(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCompression: """ - The Referenceless Audio Generation Metric is a tool designed to evaluate the - quality of generated audio content without the need for a reference or original - audio sample for comparison. + Reduces the size of image files without significantly compromising their visual +quality. Useful for optimizing storage and improving webpage load times. """ - return ReferencelessAudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageCompression(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_class_image_classification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> MultiClassImageClassification: + def noise_removal(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NoiseRemoval: """ - Multi Class Image Classification is a machine learning task where an algorithm - is trained to categorize images into one of several predefined classes or - categories based on their visual content. + Noise Removal is a process that involves identifying and eliminating unwanted +random variations or disturbances from an audio signal to enhance the clarity +and quality of the underlying information. """ - return MultiClassImageClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return NoiseRemoval(*args, asset_id=asset_id, pipeline=self, **kwargs) - def semantic_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SemanticSegmentation: + def text_summarization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSummarization: """ - Semantic segmentation is a computer vision process that involves classifying - each pixel in an image into a predefined category, effectively partitioning the - image into meaningful segments based on the objects or regions they represent. + Extracts the main points from a larger body of text, producing a concise +summary without losing the primary message. """ - return SemanticSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextSummarization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def instance_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InstanceSegmentation: + def text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetric: """ - Instance segmentation is a computer vision task that involves detecting and - delineating each distinct object within an image, assigning a unique label and - precise boundary to every individual instance of objects, even if they belong - to the same category. + A Text Generation Metric is a quantitative measure used to evaluate the quality +and effectiveness of text produced by natural language processing models, often +assessing aspects such as coherence, relevance, fluency, and adherence to given +prompts or instructions. """ - return InstanceSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_colorization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageColorization: + def image_captioning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCaptioning: """ - Image colorization is a process that involves adding color to grayscale images, - transforming them from black-and-white to full-color representations, often - using advanced algorithms and machine learning techniques to predict and apply - the appropriate hues and shades. + Image Captioning is a process that involves generating a textual description of +an image, typically using machine learning models to analyze the visual content +and produce coherent and contextually relevant sentences that describe the +objects, actions, and scenes depicted in the image. """ - return ImageColorization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageCaptioning(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioGenerationMetric: + def benchmark_scoring_mt(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringMt: """ - The Audio Generation Metric is a quantitative measure used to evaluate the - quality, accuracy, and overall performance of audio generated by artificial - intelligence systems, often considering factors such as fidelity, - intelligibility, and similarity to human-produced audio. + Benchmark Scoring MT is a function designed to evaluate and score machine +translation systems by comparing their output against a set of predefined +benchmarks, thereby assessing their accuracy and performance. """ - return AudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + return BenchmarkScoringMt(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_impainting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageImpainting: + def speaker_diarization_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationAudio: """ - Image inpainting is a process that involves filling in missing or damaged parts - of an image in a way that is visually coherent and seamlessly blends with the - surrounding areas, often using advanced algorithms and techniques to restore - the image to its original or intended appearance. + Identifies individual speakers and their respective speech segments within an +audio clip. Ideal for multi-speaker recordings or conference calls. """ - return ImageImpainting(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeakerDiarizationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) - def style_transfer(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> StyleTransfer: + def benchmark_scoring_asr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringAsr: """ - Style Transfer is a technique in artificial intelligence that applies the - visual style of one image (such as the brushstrokes of a famous painting) to - the content of another image, effectively blending the artistic elements of the - first image with the subject matter of the second. + Benchmark Scoring ASR is a function that evaluates and compares the performance +of automatic speech recognition systems by analyzing their accuracy, speed, and +other relevant metrics against a standardized set of benchmarks. """ - return StyleTransfer(*args, asset_id=asset_id, pipeline=self, **kwargs) + return BenchmarkScoringAsr(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_class_text_classification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> MultiClassTextClassification: + def visual_question_answering(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisualQuestionAnswering: """ - Multi Class Text Classification is a natural language processing task that - involves categorizing a given text into one of several predefined classes or - categories based on its content. + Visual Question Answering (VQA) is a task in artificial intelligence that +involves analyzing an image and providing accurate, contextually relevant +answers to questions posed about the visual content of that image. """ - return MultiClassTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VisualQuestionAnswering(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextEmbedding: + def document_image_parsing(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentImageParsing: """ - Text embedding is a process that converts text into numerical vectors, - capturing the semantic meaning and contextual relationships of words or - phrases, enabling machines to understand and analyze natural language more - effectively. + Document Image Parsing is the process of analyzing and converting scanned or +photographed images of documents into structured, machine-readable formats by +identifying and extracting text, layout, and other relevant information. """ - return TextEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return DocumentImageParsing(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_label_text_classification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> MultiLabelTextClassification: + def multi_label_text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiLabelTextClassification: """ - Multi Label Text Classification is a natural language processing task where a - given text is analyzed and assigned multiple relevant labels or categories from - a predefined set, allowing for the text to belong to more than one category - simultaneously. + Multi Label Text Classification is a natural language processing task where a +given text is analyzed and assigned multiple relevant labels or categories from +a predefined set, allowing for the text to belong to more than one category +simultaneously. """ return MultiLabelTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextReconstruction: """ - Text Reconstruction is a process that involves piecing together fragmented or - incomplete text data to restore it to its original, coherent form. + Text Reconstruction is a process that involves piecing together fragmented or +incomplete text data to restore it to its original, coherent form. """ return TextReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) - def fact_checking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FactChecking: + def video_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoContentModeration: + """ + Automatically reviews video content to detect and possibly remove inappropriate +or harmful material. Essential for user-generated content platforms. + """ + return VideoContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def multilingual_speech_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultilingualSpeechRecognition: + """ + Multilingual Speech Recognition is a technology that enables the automatic +transcription of spoken language into text across multiple languages, allowing +for seamless communication and understanding in diverse linguistic contexts. + """ + return MultilingualSpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def entity_linking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EntityLinking: + """ + Associates identified entities in the text with specific entries in a knowledge +base or database. + """ + return EntityLinking(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioReconstruction: + """ + Audio Reconstruction is the process of restoring or recreating audio signals +from incomplete, damaged, or degraded recordings to achieve a high-quality, +accurate representation of the original sound. + """ + return AudioReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioEmotionDetection: """ - Fact Checking is the process of verifying the accuracy and truthfulness of - information, statements, or claims by cross-referencing with reliable sources - and evidence. + Audio Emotion Detection is a technology that analyzes vocal characteristics and +patterns in audio recordings to identify and classify the emotional state of +the speaker. """ - return FactChecking(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioEmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechClassification: + def split_on_linebreak(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnLinebreak: """ - Speech Classification is a process that involves analyzing and categorizing - spoken language into predefined categories or classes based on various features - such as tone, pitch, and linguistic content. + The "Split On Linebreak" function divides a given string into a list of +substrings, using linebreaks (newline characters) as the points of separation. """ - return SpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SplitOnLinebreak(*args, asset_id=asset_id, pipeline=self, **kwargs) - def intent_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> IntentClassification: + def keyword_spotting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> KeywordSpotting: """ - Intent Classification is a natural language processing task that involves - analyzing and categorizing user text input to determine the underlying purpose - or goal behind the communication, such as booking a flight, asking for weather - information, or setting a reminder. + Keyword Spotting is a function that enables the detection and identification of +specific words or phrases within a stream of audio, often used in voice- +activated systems to trigger actions or commands based on recognized keywords. """ - return IntentClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return KeywordSpotting(*args, asset_id=asset_id, pipeline=self, **kwargs) - def part_of_speech_tagging(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> PartOfSpeechTagging: + def text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextClassification: """ - Part of Speech Tagging is a natural language processing task that involves - assigning each word in a sentence its corresponding part of speech, such as - noun, verb, adjective, or adverb, based on its role and context within the - sentence. + Categorizes text into predefined groups or topics, facilitating content +organization and targeted actions. """ - return PartOfSpeechTagging(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def metric_aggregation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MetricAggregation: + def offensive_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OffensiveLanguageIdentification: """ - Metric Aggregation is a function that computes and summarizes numerical data by - applying statistical operations, such as averaging, summing, or finding the - minimum and maximum values, to provide insights and facilitate analysis of - large datasets. + Detects language or phrases that might be considered offensive, aiding in +content moderation and creating respectful user interactions. """ - return MetricAggregation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return OffensiveLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def dialect_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DialectDetection: + def speech_non_speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechNonSpeechClassification: """ - Dialect Detection is a function that identifies and classifies the specific - regional or social variations of a language spoken or written by an individual, - enabling the recognition of distinct linguistic patterns and nuances associated - with different dialects. + Differentiates between speech and non-speech audio segments. Great for editing +software and transcription services to exclude irrelevant audio. """ - return DialectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechNonSpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def inverse_text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InverseTextNormalization: + def named_entity_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NamedEntityRecognition: """ - Inverse Text Normalization is the process of converting spoken or written - language in its normalized form, such as numbers, dates, and abbreviations, - back into their original, more complex or detailed textual representations. + Identifies and classifies named entities (e.g., persons, organizations, +locations) within text. Useful for information extraction, content tagging, and +search enhancements. """ - return InverseTextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return NamedEntityRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_to_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToAudio: + def image_manipulation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageManipulation: """ - The Text to Audio function converts written text into spoken words, allowing - users to listen to the content instead of reading it. + Image Manipulation refers to the process of altering or enhancing digital +images using various techniques and tools to achieve desired visual effects, +correct imperfections, or transform the image's appearance. """ - return TextToAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageManipulation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def fill_text_mask(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FillTextMask: + def split_on_silence(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnSilence: """ - The "Fill Text Mask" function takes a text input with masked or placeholder - characters and replaces those placeholders with specified or contextually - appropriate characters to generate a complete and coherent text output. + The "Split On Silence" function divides an audio recording into separate +segments based on periods of silence, allowing for easier editing and analysis +of individual sections. """ - return FillTextMask(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SplitOnSilence(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoContentModeration: + def text_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToVideoGeneration: """ - Video Content Moderation is the process of reviewing, analyzing, and filtering - video content to ensure it adheres to community guidelines, legal standards, - and platform policies, thereby preventing the dissemination of inappropriate, - harmful, or illegal material. + Text To Video Generation is a process that converts written descriptions or +scripts into dynamic, visual video content using advanced algorithms and +artificial intelligence. """ - return VideoContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def extract_audio_from_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ExtractAudioFromVideo: + def document_information_extraction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentInformationExtraction: """ - The "Extract Audio From Video" function allows users to separate and save the - audio track from a video file, enabling them to obtain just the sound without - the accompanying visual content. + Document Information Extraction is the process of automatically identifying, +extracting, and structuring relevant data from unstructured or semi-structured +documents, such as invoices, receipts, contracts, and forms, to facilitate +easier data management and analysis. """ - return ExtractAudioFromVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) + return DocumentInformationExtraction(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_compression(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCompression: + def video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoGeneration: """ - Image compression is a process that reduces the file size of an image by - removing redundant or non-essential data, while maintaining an acceptable level - of visual quality. + Produces video content based on specific inputs or datasets. Can be used for +simulations, animations, or even deepfake detection. """ - return ImageCompression(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multilingual_speech_recognition( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> MultilingualSpeechRecognition: + def text_to_image_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToImageGeneration: """ - Multilingual Speech Recognition is a technology that enables the automatic - transcription of spoken language into text across multiple languages, allowing - for seamless communication and understanding in diverse linguistic contexts. + Creates a visual representation based on textual input, turning descriptions +into pictorial forms. Used in creative processes and content generation. """ - return MultilingualSpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextToImageGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_text_generation_metric( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> ReferencelessTextGenerationMetric: + def referenceless_text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessTextGenerationMetric: """ - The Referenceless Text Generation Metric is a method for evaluating the quality - of generated text without requiring a reference text for comparison, often - leveraging models or algorithms to assess coherence, relevance, and fluency - based on intrinsic properties of the text itself. + The Referenceless Text Generation Metric is a method for evaluating the quality +of generated text without requiring a reference text for comparison, often +leveraging models or algorithms to assess coherence, relevance, and fluency +based on intrinsic properties of the text itself. """ return ReferencelessTextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetricDefault: + def other__multipurpose_(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OtherMultipurpose: """ - The "Text Generation Metric Default" function provides a standard set of - evaluation metrics for assessing the quality and performance of text generation - models. + The "Other (Multipurpose)" function serves as a versatile category designed to +accommodate a wide range of tasks and activities that do not fit neatly into +predefined classifications, offering flexibility and adaptability for various +needs. """ - return TextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) + return OtherMultipurpose(*args, asset_id=asset_id, pipeline=self, **kwargs) - def noise_removal(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NoiseRemoval: + def image_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageLabelDetection: """ - Noise Removal is a process that involves identifying and eliminating unwanted - random variations or disturbances from an audio signal to enhance the clarity - and quality of the underlying information. + Identifies objects, themes, or topics within images, useful for image +categorization, search, and recommendation systems. """ - return NoiseRemoval(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioReconstruction: + def speaker_diarization_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationVideo: """ - Audio Reconstruction is the process of restoring or recreating audio signals - from incomplete, damaged, or degraded recordings to achieve a high-quality, - accurate representation of the original sound. + Segments a video based on different speakers, identifying when each individual +speaks. Useful for transcriptions and understanding multi-person conversations. """ - return AudioReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeakerDiarizationVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) - def voice_cloning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceCloning: + def audio_transcript_improvement(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptImprovement: """ - Voice cloning is a technology that uses artificial intelligence to create a - digital replica of a person's voice, allowing for the generation of speech that - mimics the tone, pitch, and speaking style of the original speaker. + Refines and corrects transcriptions generated from audio data, improving +readability and accuracy. """ - return VoiceCloning(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioTranscriptImprovement(*args, asset_id=asset_id, pipeline=self, **kwargs) - def diacritization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Diacritization: + def dialect_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DialectDetection: """ - Diacritization is the process of adding diacritical marks to letters in a text - to indicate pronunciation, stress, tone, or meaning, often used in languages - such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in - written communication. + Identifies specific dialects within a language, aiding in localized content +creation or user experience personalization. """ - return Diacritization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return DialectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioEmotionDetection: + def sentiment_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SentimentAnalysis: """ - Audio Emotion Detection is a technology that analyzes vocal characteristics and - patterns in audio recordings to identify and classify the emotional state of - the speaker. + Determines the sentiment or emotion (e.g., positive, negative, neutral) of a +piece of text, aiding in understanding user feedback or market sentiment. """ - return AudioEmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SentimentAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_summarization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSummarization: + def speech_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechEmbedding: """ - Text summarization is the process of condensing a large body of text into a - shorter version, capturing the main points and essential information while - maintaining coherence and meaning. + Transforms spoken content into a fixed-size vector in a high-dimensional space +that captures the content's essence. Facilitates tasks like speech recognition +and speaker verification. """ - return TextSummarization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def entity_linking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EntityLinking: + def text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetricDefault: """ - Entity Linking is the process of identifying and connecting mentions of - entities within a text to their corresponding entries in a structured knowledge - base, thereby enabling the disambiguation of terms and enhancing the - understanding of the text's context. + The "Text Generation Metric Default" function provides a standard set of +evaluation metrics for assessing the quality and performance of text generation +models. """ - return EntityLinking(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetric: + def audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioGenerationMetric: """ - A Text Generation Metric is a quantitative measure used to evaluate the quality - and effectiveness of text produced by natural language processing models, often - assessing aspects such as coherence, relevance, fluency, and adherence to given - prompts or instructions. + The Audio Generation Metric is a quantitative measure used to evaluate the +quality, accuracy, and overall performance of audio generated by artificial +intelligence systems, often considering factors such as fidelity, +intelligibility, and similarity to human-produced audio. """ - return TextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def split_on_linebreak(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnLinebreak: + def audio_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioLanguageIdentification: """ - The "Split On Linebreak" function divides a given string into a list of - substrings, using linebreaks (newline characters) as the points of separation. + Audio Language Identification is a process that involves analyzing an audio +recording to determine the language being spoken. """ - return SplitOnLinebreak(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def sentiment_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SentimentAnalysis: + def video_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoLabelDetection: """ - Sentiment Analysis is a natural language processing technique used to determine - and classify the emotional tone or subjective information expressed in a piece - of text, such as identifying whether the sentiment is positive, negative, or - neutral. + Identifies and tags objects, scenes, or activities within a video. Useful for +content indexing and recommendation systems. """ - return SentimentAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def keyword_spotting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> KeywordSpotting: + def topic_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TopicClassification: """ - Keyword Spotting is a function that enables the detection and identification of - specific words or phrases within a stream of audio, often used in voice- - activated systems to trigger actions or commands based on recognized keywords. + Assigns categories or topics to a piece of text based on its content, +facilitating content organization and retrieval. """ - return KeywordSpotting(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TopicClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextClassification: + def referenceless_text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessTextGenerationMetricDefault: """ - Text Classification is a natural language processing task that involves - categorizing text into predefined labels or classes based on its content, - enabling automated organization, filtering, and analysis of large volumes of - textual data. + The Referenceless Text Generation Metric Default is a function designed to +evaluate the quality of generated text without relying on reference texts for +comparison. """ - return TextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ReferencelessTextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) - def other__multipurpose_(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OtherMultipurpose: + def image_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageContentModeration: """ - The "Other (Multipurpose)" function serves as a versatile category designed to - accommodate a wide range of tasks and activities that do not fit neatly into - predefined classifications, offering flexibility and adaptability for various - needs. + Detects and filters out inappropriate or harmful images, essential for +platforms with user-generated visual content. """ - return OtherMultipurpose(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_synthesis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechSynthesis: + def asr_age_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrAgeClassification: """ - Speech synthesis is the artificial production of human speech, typically - achieved through software or hardware systems that convert text into spoken - words, enabling machines to communicate verbally with users. + The ASR Age Classification function is designed to analyze audio recordings of +speech to determine the speaker's age group by leveraging automatic speech +recognition (ASR) technology and machine learning algorithms. """ - return SpeechSynthesis(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AsrAgeClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_intent_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioIntentDetection: + def asr_gender_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrGenderClassification: """ - Audio Intent Detection is a process that involves analyzing audio signals to - identify and interpret the underlying intentions or purposes behind spoken - words, enabling systems to understand and respond appropriately to human - speech. + The ASR Gender Classification function analyzes audio recordings to determine +and classify the speaker's gender based on their voice characteristics. """ - return AudioIntentDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AsrGenderClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoLabelDetection: + def base_model(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BaseModel: """ - Video Label Detection is a function that automatically identifies and tags - various objects, scenes, activities, and other relevant elements within a - video, providing descriptive labels that enhance searchability and content - organization. + The Base-Model function serves as a foundational framework designed to provide +essential features and capabilities upon which more specialized or advanced +models can be built and customized. """ - return VideoLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return BaseModel(*args, asset_id=asset_id, pipeline=self, **kwargs) - def asr_quality_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrQualityEstimation: + def language_identification_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentificationAudio: """ - ASR Quality Estimation is a process that evaluates the accuracy and reliability - of automatic speech recognition systems by analyzing their performance in - transcribing spoken language into text. + The Language Identification Audio function analyzes audio input to determine +and identify the language being spoken. """ - return AsrQualityEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return LanguageIdentificationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_transcript_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptAnalysis: + def multi_class_image_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiClassImageClassification: """ - Audio Transcript Analysis is a process that involves converting spoken language - from audio recordings into written text, followed by examining and interpreting - the transcribed content to extract meaningful insights, identify patterns, and - derive actionable information. + Multi Class Image Classification is a machine learning task where an algorithm +is trained to categorize images into one of several predefined classes or +categories based on their visual content. """ - return AudioTranscriptAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) + return MultiClassImageClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def search(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Search: + def semantic_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SemanticSegmentation: """ - The "Search" function allows users to input keywords or phrases to quickly - locate specific information, files, or content within a database, website, or - application. + Semantic segmentation is a computer vision process that involves classifying +each pixel in an image into a predefined category, effectively partitioning the +image into meaningful segments based on the objects or regions they represent. """ - return Search(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SemanticSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoForcedAlignment: + def instance_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InstanceSegmentation: """ - Video Forced Alignment is a process that synchronizes video footage with - corresponding audio tracks by precisely aligning the visual and auditory - elements, ensuring that the movements of speakers' lips match the spoken words. + Instance segmentation is a computer vision task that involves detecting and +delineating each distinct object within an image, assigning a unique label and +precise boundary to every individual instance of objects, even if they belong +to the same category. """ - return VideoForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) + return InstanceSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def viseme_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisemeGeneration: + def emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EmotionDetection: """ - Viseme Generation is the process of creating visual representations of - phonemes, which are the distinct units of sound in speech, to synchronize lip - movements with spoken words in animations or virtual avatars. + Identifies human emotions from text or audio, enhancing user experience in +chatbots or customer feedback analysis. """ - return VisemeGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return EmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def topic_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TopicClassification: + def text_spam_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSpamDetection: """ - Topic Classification is a natural language processing function that categorizes - text into predefined topics or subjects based on its content, enabling - efficient organization and retrieval of information. + Identifies and filters out unwanted or irrelevant text content, ideal for +moderating user-generated content or ensuring quality in communication +platforms. """ - return TopicClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextSpamDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def offensive_language_identification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> OffensiveLanguageIdentification: + def text_denormalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextDenormalization: """ - Offensive Language Identification is a function that analyzes text to detect - and flag language that is abusive, harmful, or inappropriate, helping to - maintain a respectful and safe communication environment. + Converts standardized or normalized text into its original, often more +readable, form. Useful in natural language generation tasks. """ - return OffensiveLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextDenormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechTranslation: + def referenceless_audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessAudioGenerationMetric: """ - Speech Translation is a technology that converts spoken language in real-time - from one language to another, enabling seamless communication between speakers - of different languages. + The Referenceless Audio Generation Metric is a tool designed to evaluate the +quality of generated audio content without the need for a reference or original +audio sample for comparison. """ - return SpeechTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ReferencelessAudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speaker_diarization_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationAudio: + def audio_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioForcedAlignment: """ - Speaker Diarization Audio is a process that involves segmenting an audio - recording into distinct sections, each corresponding to a different speaker, in - order to identify and differentiate between multiple speakers within the same - audio stream. + Synchronizes phonetic and phonological text with the corresponding segments in +an audio file. Useful in linguistic research and detailed transcription tasks. """ - return SpeakerDiarizationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_transcript_improvement(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptImprovement: + def video_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoForcedAlignment: """ - Audio Transcript Improvement is a function that enhances the accuracy and - clarity of transcribed audio recordings by correcting errors, refining - language, and ensuring the text faithfully represents the original spoken - content. + Aligns the transcription of spoken content in a video with its corresponding +timecodes, facilitating subtitle creation. """ - return AudioTranscriptImprovement(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_non_speech_classification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> SpeechNonSpeechClassification: + def classification_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ClassificationMetric: """ - The function "Speech or Non-Speech Classification" is designed to analyze audio - input and determine whether the sound is human speech or non-speech noise, - enabling applications such as voice recognition systems to filter out - irrelevant background sounds. + A Classification Metric is a quantitative measure used to evaluate the quality +and effectiveness of classification models. """ - return SpeechNonSpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ClassificationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_denormalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextDenormalization: + def auto_mask_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AutoMaskGeneration: """ - Text Denormalization is the process of converting abbreviated, contracted, or - otherwise simplified text into its full, standard form, often to improve - readability and ensure consistency in natural language processing tasks. + Auto-mask generation refers to the automated process of creating masks in image +processing or computer vision, typically for segmentation tasks. A mask is a +binary or multi-class image that labels different parts of an image, usually +separating the foreground (objects of interest) from the background, or +identifying specific object classes in an image. """ - return TextDenormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AutoMaskGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageContentModeration: + def text_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextEmbedding: """ - Image Content Moderation is a process that involves analyzing and filtering - images to detect and manage inappropriate, harmful, or sensitive content, - ensuring compliance with community guidelines and legal standards. + Text embedding is a process that converts text into numerical vectors, +capturing the semantic meaning and contextual relationships of words or +phrases, enabling machines to understand and analyze natural language more +effectively. """ - return ImageContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_text_generation_metric_default( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> ReferencelessTextGenerationMetricDefault: + def fact_checking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FactChecking: """ - The Referenceless Text Generation Metric Default is a function designed to - evaluate the quality of generated text without relying on reference texts for - comparison. + Fact Checking is the process of verifying the accuracy and truthfulness of +information, statements, or claims by cross-referencing with reliable sources +and evidence. """ - return ReferencelessTextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) + return FactChecking(*args, asset_id=asset_id, pipeline=self, **kwargs) - def named_entity_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NamedEntityRecognition: + def text_to_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToAudio: """ - Named Entity Recognition (NER) is a natural language processing task that - involves identifying and classifying proper nouns in text into predefined - categories such as names of people, organizations, locations, dates, and other - entities. + The Text to Audio function converts written text into spoken words, allowing +users to listen to the content instead of reading it. """ - return NamedEntityRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextToAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextContentModeration: + def fill_text_mask(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FillTextMask: """ - Text Content Moderation is the process of reviewing, filtering, and managing - user-generated content to ensure it adheres to community guidelines, legal - standards, and platform policies, thereby maintaining a safe and respectful - online environment. + Completes missing parts of a text based on the context, ideal for content +generation or data augmentation tasks. """ - return TextContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return FillTextMask(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speaker_diarization_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationVideo: + def voice_cloning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceCloning: """ - The Speaker Diarization Video function identifies and segments different - speakers in a video, attributing portions of the audio to individual speakers - to facilitate analysis and understanding of multi-speaker conversations. + Replicates a person's voice based on a sample, allowing for the generation of +speech in that person's tone and style. Used cautiously due to ethical +considerations. """ - return SpeakerDiarizationVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VoiceCloning(*args, asset_id=asset_id, pipeline=self, **kwargs) - def split_on_silence(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnSilence: + def diacritization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Diacritization: """ - The "Split On Silence" function divides an audio recording into separate - segments based on periods of silence, allowing for easier editing and analysis - of individual sections. + Adds diacritical marks to text, essential for languages where meaning can +change based on diacritics. """ - return SplitOnSilence(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Diacritization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EmotionDetection: + def speech_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechTranslation: """ - Emotion Detection is a process that involves analyzing text to identify and - categorize the emotional states or sentiments expressed by individuals, such as - happiness, sadness, anger, or fear. + Speech Translation is a technology that converts spoken language in real-time +from one language to another, enabling seamless communication between speakers +of different languages. """ - return EmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_spam_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSpamDetection: + def speech_synthesis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechSynthesis: """ - Text Spam Detection is a process that involves analyzing and identifying - unsolicited or irrelevant messages within text communications, typically using - algorithms and machine learning techniques to filter out spam and ensure the - integrity of the communication platform. + Generates human-like speech from written text. Ideal for text-to-speech +applications, audiobooks, and voice assistants. """ - return TextSpamDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechSynthesis(*args, asset_id=asset_id, pipeline=self, **kwargs) - def translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Translation: + def text_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextContentModeration: """ - Translation is the process of converting text from one language into an - equivalent text in another language, preserving the original meaning and - context. + Scans and identifies potentially harmful, offensive, or inappropriate textual +content, ensuring safer user environments. """ - return Translation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def voice_activity_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceActivityDetection: + def subtitling_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SubtitlingTranslation: """ - Voice Activity Detection (VAD) is a technology that identifies the presence or - absence of human speech within an audio signal, enabling systems to distinguish - between spoken words and background noise. + Converts the text of subtitles from one language to another, ensuring context +and cultural nuances are maintained. Essential for global content distribution. """ - return VoiceActivityDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SubtitlingTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechEmbedding: + def audio_transcript_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptAnalysis: """ - Speech Embedding is a process that transforms spoken language into a fixed- - dimensional vector representation, capturing essential features and - characteristics of the speech for tasks such as recognition, classification, - and analysis. + Analyzes transcribed audio data for insights, patterns, or specific information +extraction. """ - return SpeechEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioTranscriptAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) - def subtitling_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SubtitlingTranslation: + def text_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGeneration: """ - Subtitling Translation is the process of converting spoken dialogue from one - language into written text in another language, which is then displayed on- - screen to aid viewers in understanding the content. + Creates coherent and contextually relevant textual content based on prompts or +certain parameters. Useful for chatbots, content creation, and data +augmentation. """ - return SubtitlingTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGeneration: + def text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextNormalization: """ - Text Generation is a process in which artificial intelligence models, such as - neural networks, produce coherent and contextually relevant text based on a - given input or prompt, often mimicking human writing styles and patterns. + Converts unstructured or non-standard textual data into a more readable and +uniform format, dealing with abbreviations, numerals, and other non-standard +words. """ - return TextGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_understanding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoUnderstanding: + def voice_activity_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceActivityDetection: """ - Video Understanding is the process of analyzing and interpreting video content - to extract meaningful information, such as identifying objects, actions, - events, and contextual relationships within the footage. + Determines when a person is speaking in an audio clip. It's an essential +preprocessing step for other audio-related tasks. """ - return VideoUnderstanding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VoiceActivityDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToVideoGeneration: + def video_understanding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoUnderstanding: """ - Text To Video Generation is a process that converts written descriptions or - scripts into dynamic, visual video content using advanced algorithms and - artificial intelligence. + Video Understanding is the process of analyzing and interpreting video content +to extract meaningful information, such as identifying objects, actions, +events, and contextual relationships within the footage. """ - return TextToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoUnderstanding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextNormalization: + def translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Translation: """ - Text normalization is the process of transforming text into a standard, - consistent format by correcting spelling errors, converting all characters to a - uniform case, removing punctuation, and expanding abbreviations to improve the - text's readability and usability for further processing or analysis. + Converts text from one language to another while maintaining the original +message's essence and context. Crucial for global communication. """ - return TextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Translation(*args, asset_id=asset_id, pipeline=self, **kwargs) def speech_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechRecognition: """ - Speech recognition is a technology that enables a computer or device to - identify and process spoken language, converting it into text. + Converts spoken language into written text. Useful for transcription services, +voice assistants, and applications requiring voice-to-text capabilities. """ return SpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) def subtitling(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Subtitling: """ - Subtitling is the process of displaying written text on a screen to represent - the spoken dialogue, narration, or other audio elements in a video, typically - to aid viewers who are deaf or hard of hearing, or to provide translations for - audiences who speak different languages. + Generates accurate subtitles for videos, enhancing accessibility for diverse +audiences. """ return Subtitling(*args, asset_id=asset_id, pipeline=self, **kwargs) - def classification_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ClassificationMetric: - """ - A Classification Metric is a quantitative measure used to evaluate the quality - and effectiveness of classification models. - """ - return ClassificationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - - def text_to_image_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToImageGeneration: - """ - Text To Image Generation is a process where a system creates visual images - based on descriptive text input, translating written language into - corresponding graphical representations. - """ - return TextToImageGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) From a7b5a46a9198ef6c8c515ceae463165eb435f991 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Fri, 20 Dec 2024 20:34:15 -0300 Subject: [PATCH 090/105] Add restriction when setting Utilities function in model tool (#346) --- aixplain/modules/agent/tool/model_tool.py | 3 +++ tests/unit/agent_test.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index 628377a3..0b1c3179 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -60,6 +60,9 @@ def __init__( if function is not None: if isinstance(function, str): function = Function(function) + assert ( + function is None or function is not Function.UTILITIES or model is not None + ), "Agent Creation Error: Utility function must be used with an associated model." try: if isinstance(supplier, dict): diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index ce1eac63..b96c1531 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -8,6 +8,7 @@ from aixplain.modules.agent import PipelineTool, ModelTool from aixplain.modules.agent.utils import process_variables from urllib.parse import urljoin +from aixplain.enums.function import Function def test_fail_no_data_query(): @@ -265,3 +266,9 @@ def test_process_variables(): "input": "Hello, how are you?", "target_language": "English", } + + +def test_fail_utilities_without_model(): + with pytest.raises(Exception) as exc_info: + AgentFactory.create(name="Test", tools=[ModelTool(function=Function.UTILITIES)], llm_id="6646261c6eb563165658bbb1") + assert str(exc_info.value) == "Agent Creation Error: Utility function must be used with an associated model." From 9e787b9f62cfbaab99b6f8002d9ac5d17210cfd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20G=C3=BCnd=C3=BCz?= Date: Mon, 23 Dec 2024 13:01:47 +0300 Subject: [PATCH 091/105] ENG-1245: transition from update to save (#345) * save methods created as wrapper of update method * warning added andunit tests started * add .ipynb to gitignore * removed the unnecassary comments --- .gitignore | 3 ++ aixplain/modules/agent/__init__.py | 18 ++++++- aixplain/modules/model/utility_model.py | 17 +++++++ aixplain/modules/pipeline/asset.py | 37 ++++++++++---- aixplain/modules/team_agent/__init__.py | 15 ++++++ tests/unit/agent_test.py | 68 ++++++++++++++++++++++++- tests/unit/utility_test.py | 41 ++++++++++++++- 7 files changed, 185 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 3ec74da5..304f04cb 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,12 @@ __pycache__/ *.py[cod] *$py.class +.aixplain_cache/ +setup_env_ahmet.sh # C extensions *.so +*.ipynb # Distribution / packaging .Python diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index c436b84a..d6d6d77d 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -305,9 +305,20 @@ def delete(self) -> None: message = f"Agent Deletion Error (HTTP {r.status_code}): There was an error in deleting the agent." logging.error(message) raise Exception(f"{message}") - + def update(self) -> None: """Update agent.""" + import warnings + import inspect + # Get the current call stack + stack = inspect.stack() + if len(stack) > 2 and stack[1].function != 'save': + warnings.warn( + "update() is deprecated and will be removed in a future version. " + "Please use save() instead.", + DeprecationWarning, + stacklevel=2 + ) from aixplain.factories.agent_factory.utils import build_agent self.validate() @@ -330,6 +341,11 @@ def update(self) -> None: error_msg = f"Agent Update Error (HTTP {r.status_code}): {resp}" raise Exception(error_msg) + + def save(self) -> None: + """Save the Agent.""" + self.update() + def deploy(self) -> None: assert self.status == AssetStatus.DRAFT, "Agent must be in draft status to be deployed." assert self.status != AssetStatus.ONBOARDED, "Agent is already deployed." diff --git a/aixplain/modules/model/utility_model.py b/aixplain/modules/model/utility_model.py index f3f597ef..b5748ca7 100644 --- a/aixplain/modules/model/utility_model.py +++ b/aixplain/modules/model/utility_model.py @@ -138,6 +138,18 @@ def to_dict(self): } def update(self): + """Update the Utility Model.""" + import warnings + import inspect + # Get the current call stack + stack = inspect.stack() + if len(stack) > 2 and stack[1].function != 'save': + warnings.warn( + "update() is deprecated and will be removed in a future version. " + "Please use save() instead.", + DeprecationWarning, + stacklevel=2 + ) self.validate() url = urljoin(self.backend_url, f"sdk/utilities/{self.id}") headers = {"x-api-key": f"{self.api_key}", "Content-Type": "application/json"} @@ -156,7 +168,12 @@ def update(self): logging.error(message) raise Exception(f"{message}") + def save(self): + """Save the Utility Model.""" + self.update() + def delete(self): + """Delete the Utility Model.""" url = urljoin(self.backend_url, f"sdk/utilities/{self.id}") headers = {"x-api-key": f"{self.api_key}", "Content-Type": "application/json"} try: diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index 308f19b3..cc2bb8c6 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -384,6 +384,17 @@ def update( Raises: Exception: Make sure the pipeline to be save is in a JSON file. """ + import warnings + import inspect + # Get the current call stack + stack = inspect.stack() + if len(stack) > 2 and stack[1].function != 'save': + warnings.warn( + "update() is deprecated and will be removed in a future version. " + "Please use save() instead.", + DeprecationWarning, + stacklevel=2 + ) try: if isinstance(pipeline, str) is True: _, ext = os.path.splitext(pipeline) @@ -437,10 +448,11 @@ def delete(self) -> None: logging.error(message) raise Exception(f"{message}") - def save(self, save_as_asset: bool = False, api_key: Optional[Text] = None): - """Save Pipeline + def save(self, pipeline: Optional[Union[Text, Dict]] = None, save_as_asset: bool = False, api_key: Optional[Text] = None): + """Update and Save Pipeline Args: + pipeline (Optional[Union[Text, Dict]]): Pipeline as a Python dictionary or in a JSON file save_as_asset (bool, optional): Save as asset (True) or draft (False). Defaults to False. api_key (Optional[Text], optional): Team API Key to create the Pipeline. Defaults to None. @@ -448,7 +460,17 @@ def save(self, save_as_asset: bool = False, api_key: Optional[Text] = None): Exception: Make sure the pipeline to be save is in a JSON file. """ try: - pipeline = self.to_dict() + if pipeline is None: + pipeline = self.to_dict() + else: + if isinstance(pipeline, str) is True: + _, ext = os.path.splitext(pipeline) + assert ( + os.path.exists(pipeline) and ext == ".json" + ), "Pipeline Update Error: Make sure the pipeline to be saved is in a JSON file." + with open(pipeline) as f: + pipeline = json.load(f) + self.update(pipeline=pipeline, save_as_asset=save_as_asset, api_key=api_key) for i, node in enumerate(pipeline["nodes"]): if "functionType" in node: @@ -463,19 +485,14 @@ def save(self, save_as_asset: bool = False, api_key: Optional[Text] = None): "architecture": pipeline, } - if self.id != "": - method = "put" - url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") - else: - method = "post" - url = urljoin(config.BACKEND_URL, "sdk/pipelines") + url = urljoin(config.BACKEND_URL, "sdk/pipelines") api_key = api_key if api_key is not None else config.TEAM_API_KEY headers = { "Authorization": f"Token {api_key}", "Content-Type": "application/json", } logging.info(f"Start service for Save Pipeline - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry(method, url, headers=headers, json=payload) + r = _request_with_retry("post", url, headers=headers, json=payload) response = r.json() self.id = response["id"] logging.info(f"Pipeline {response['id']} Saved.") diff --git a/aixplain/modules/team_agent/__init__.py b/aixplain/modules/team_agent/__init__.py index f92b437d..80729d80 100644 --- a/aixplain/modules/team_agent/__init__.py +++ b/aixplain/modules/team_agent/__init__.py @@ -310,6 +310,17 @@ def validate(self) -> None: def update(self) -> None: """Update the Team Agent.""" + import warnings + import inspect + # Get the current call stack + stack = inspect.stack() + if len(stack) > 2 and stack[1].function != 'save': + warnings.warn( + "update() is deprecated and will be removed in a future version. " + "Please use save() instead.", + DeprecationWarning, + stacklevel=2 + ) from aixplain.factories.team_agent_factory.utils import build_team_agent self.validate() @@ -332,6 +343,10 @@ def update(self) -> None: error_msg = f"Team Agent Update Error (HTTP {r.status_code}): {resp}" raise Exception(error_msg) + def save(self) -> None: + """Save the Team Agent.""" + self.update() + def deploy(self) -> None: """Deploy the Team Agent.""" assert self.status == AssetStatus.DRAFT, "Team Agent Deployment Error: Team Agent must be in draft status." diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index b96c1531..cf217919 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -8,9 +8,11 @@ from aixplain.modules.agent import PipelineTool, ModelTool from aixplain.modules.agent.utils import process_variables from urllib.parse import urljoin +import warnings from aixplain.enums.function import Function + def test_fail_no_data_query(): agent = Agent("123", "Test Agent", "Sample Description") with pytest.raises(Exception) as exc_info: @@ -222,7 +224,9 @@ def test_update_success(): } mock.get(url, headers=headers, json=model_ref_response) - agent.update() + # Capture warnings + with pytest.warns(DeprecationWarning, match="update\(\) is deprecated and will be removed in a future version. Please use save\(\) instead."): + agent.update() assert agent.id == ref_response["id"] assert agent.name == ref_response["name"] @@ -230,6 +234,68 @@ def test_update_success(): assert agent.llm_id == ref_response["llmId"] assert agent.tools[0].function.value == ref_response["assets"][0]["function"] +def test_save_success(): + agent = Agent( + id="123", + name="Test Agent", + description="Test Agent Description", + llm_id="6646261c6eb563165658bbb1", + tools=[AgentFactory.create_model_tool(function="text-generation")], + ) + + with requests_mock.Mocker() as mock: + url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + ref_response = { + "id": "123", + "name": "Test Agent", + "description": "Test Agent Description", + "teamId": "123", + "version": "1.0", + "status": "onboarded", + "llmId": "6646261c6eb563165658bbb1", + "pricing": {"currency": "USD", "value": 0.0}, + "assets": [ + { + "type": "model", + "supplier": "openai", + "version": "1.0", + "assetId": "6646261c6eb563165658bbb1", + "function": "text-generation", + } + ], + } + mock.put(url, headers=headers, json=ref_response) + + url = urljoin(config.BACKEND_URL, "sdk/models/6646261c6eb563165658bbb1") + model_ref_response = { + "id": "6646261c6eb563165658bbb1", + "name": "Test LLM", + "description": "Test LLM Description", + "function": {"id": "text-generation"}, + "supplier": "openai", + "version": {"id": "1.0"}, + "status": "onboarded", + "pricing": {"currency": "USD", "value": 0.0}, + } + mock.get(url, headers=headers, json=model_ref_response) + + import warnings + # Capture warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") # Trigger all warnings + + # Call the save method + agent.save() + + # Assert no warnings were triggered + assert len(w) == 0, f"Warnings were raised: {[str(warning.message) for warning in w]}" + + assert agent.id == ref_response["id"] + assert agent.name == ref_response["name"] + assert agent.description == ref_response["description"] + assert agent.llm_id == ref_response["llmId"] + assert agent.tools[0].function.value == ref_response["assets"][0]["function"] def test_run_success(): agent = Agent("123", "Test Agent", "Sample Description") diff --git a/tests/unit/utility_test.py b/tests/unit/utility_test.py index 2f3ba6a1..cb45597a 100644 --- a/tests/unit/utility_test.py +++ b/tests/unit/utility_test.py @@ -106,12 +106,49 @@ def test_update_utility_model(): function=Function.UTILITIES, api_key=config.TEAM_API_KEY, ) - utility_model.description = "updated_description" - utility_model.update() + + with pytest.warns(DeprecationWarning, match="update\(\) is deprecated and will be removed in a future version. Please use save\(\) instead."): + utility_model.description = "updated_description" + utility_model.update() assert utility_model.id == "123" assert utility_model.description == "updated_description" +def test_save_utility_model(): + with requests_mock.Mocker() as mock: + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="def main(originCode: str)"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="def main(originCode: str)"): + with patch( + "aixplain.modules.model.utils.parse_code", + return_value=( + "def main(originCode: str)", + [UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + "utility_model_test", + ), + ): + mock.put(urljoin(config.BACKEND_URL, "sdk/utilities/123"), json={"id": "123"}) + utility_model = UtilityModel( + id="123", + name="utility_model_test", + description="utility_model_test", + code="def main(originCode: str)", + output_examples="output_description", + inputs=[UtilityModelInput(name="originCode", description="originCode", type=DataType.TEXT)], + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + import warnings + # it should not trigger any warning + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") # Trigger all warnings + utility_model.description = "updated_description" + utility_model.save() + + assert len(w) == 0 + + assert utility_model.id == "123" + assert utility_model.description == "updated_description" + def test_delete_utility_model(): with requests_mock.Mocker() as mock: From 6f12c91a0544525853aaa2c5977f9b0b111e2f6f Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 23 Dec 2024 10:20:12 -0300 Subject: [PATCH 092/105] ENG-1153: Enabling custom python code tool (#326) * Enabling custom python code tool * Refactor code to enable Python Interpreter * Custom Python Code Tool * Create python custom code test * Creating an utility model with an input api key * Add functional test for custom code tool --- aixplain/factories/agent_factory/__init__.py | 18 ++- aixplain/factories/agent_factory/utils.py | 8 +- aixplain/factories/model_factory/__init__.py | 8 +- aixplain/modules/agent/__init__.py | 2 + .../agent/tool/custom_python_code_tool.py | 56 ++++++++ .../agent/tool/python_interpreter_tool.py | 42 ++++++ pyproject.toml | 2 +- .../functional/agent/agent_functional_test.py | 57 +++++++- tests/unit/agent_test.py | 127 +++++++++++------- 9 files changed, 260 insertions(+), 60 deletions(-) create mode 100644 aixplain/modules/agent/tool/custom_python_code_tool.py create mode 100644 aixplain/modules/agent/tool/python_interpreter_tool.py diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index 39ae5678..c90dcc05 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -29,10 +29,12 @@ from aixplain.modules.agent import Agent, Tool from aixplain.modules.agent.tool.model_tool import ModelTool from aixplain.modules.agent.tool.pipeline_tool import PipelineTool +from aixplain.modules.agent.tool.python_interpreter_tool import PythonInterpreterTool +from aixplain.modules.agent.tool.custom_python_code_tool import CustomPythonCodeTool from aixplain.modules.model import Model from aixplain.modules.pipeline import Pipeline from aixplain.utils import config -from typing import Dict, List, Optional, Text, Union +from typing import Callable, Dict, List, Optional, Text, Union from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin @@ -88,8 +90,8 @@ def create( agent.validate() response = "Unspecified error" try: - logging.debug(f"Start service for POST Create Agent - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, json=payload) + logging.debug(f"Start service for POST Create Agent - {url} - {headers} - {json.dumps(agent.to_dict())}") + r = _request_with_retry("post", url, headers=headers, json=agent.to_dict()) response = r.json() except Exception: raise Exception("Agent Onboarding Error: Please contact the administrators.") @@ -136,6 +138,16 @@ def create_pipeline_tool(cls, description: Text, pipeline: Union[Pipeline, Text] """Create a new pipeline tool.""" return PipelineTool(description=description, pipeline=pipeline) + @classmethod + def create_python_interpreter_tool(cls) -> PythonInterpreterTool: + """Create a new python interpreter tool.""" + return PythonInterpreterTool() + + @classmethod + def create_custom_python_code_tool(cls, code: Union[Text, Callable], description: Text = "") -> CustomPythonCodeTool: + """Create a new custom python code tool.""" + return CustomPythonCodeTool(description=description, code=code) + @classmethod def list(cls) -> Dict: """List all agents available in the platform.""" diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index e5e73dc4..54f746d8 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -3,7 +3,8 @@ import aixplain.utils.config as config from aixplain.enums import Function, Supplier from aixplain.enums.asset_status import AssetStatus -from aixplain.modules.agent import Agent, ModelTool, PipelineTool +from aixplain.modules.agent import Agent, ModelTool, PipelineTool, PythonInterpreterTool +from aixplain.modules.agent.tool.custom_python_code_tool import CustomPythonCodeTool from typing import Dict, Text from urllib.parse import urljoin @@ -34,6 +35,11 @@ def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: ) elif tool["type"] == "pipeline": tool = PipelineTool(description=tool["description"], pipeline=tool["assetId"]) + elif tool["type"] == "utility": + if tool.get("utilityCode", None) is not None: + tool = CustomPythonCodeTool(description=tool["description"], code=tool["utilityCode"]) + else: + tool = PythonInterpreterTool() else: raise Exception("Agent Creation Error: Tool type not supported.") tools.append(tool) diff --git a/aixplain/factories/model_factory/__init__.py b/aixplain/factories/model_factory/__init__.py index 4a592ab9..bbb93cfa 100644 --- a/aixplain/factories/model_factory/__init__.py +++ b/aixplain/factories/model_factory/__init__.py @@ -48,6 +48,7 @@ def create_utility_model( inputs: List[UtilityModelInput] = [], description: Optional[Text] = None, output_examples: Text = "", + api_key: Optional[Text] = None, ) -> UtilityModel: """Create a utility model @@ -57,10 +58,11 @@ def create_utility_model( description (Text, optional): description of the model inputs (List[UtilityModelInput], optional): inputs of the model output_examples (Text, optional): output examples - + api_key (Text, optional): Team API key. Defaults to None. Returns: UtilityModel: created utility model """ + api_key = config.TEAM_API_KEY if api_key is None else api_key utility_model = UtilityModel( id="", name=name, @@ -68,13 +70,13 @@ def create_utility_model( inputs=inputs, code=code, function=Function.UTILITIES, - api_key=config.TEAM_API_KEY, + api_key=api_key, output_examples=output_examples, ) utility_model.validate() payload = utility_model.to_dict() url = urljoin(cls.backend_url, "sdk/utilities") - headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} + headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"} try: logging.info(f"Start service for POST Utility Model - {url} - {headers} - {payload}") r = _request_with_retry("post", url, headers=headers, json=payload) diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index d6d6d77d..581c7e88 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -36,6 +36,8 @@ from aixplain.modules.agent.tool import Tool from aixplain.modules.agent.tool.model_tool import ModelTool from aixplain.modules.agent.tool.pipeline_tool import PipelineTool +from aixplain.modules.agent.tool.python_interpreter_tool import PythonInterpreterTool +from aixplain.modules.agent.tool.custom_python_code_tool import CustomPythonCodeTool from aixplain.modules.agent.utils import process_variables from typing import Dict, List, Text, Optional, Union from urllib.parse import urljoin diff --git a/aixplain/modules/agent/tool/custom_python_code_tool.py b/aixplain/modules/agent/tool/custom_python_code_tool.py new file mode 100644 index 00000000..9f8b69db --- /dev/null +++ b/aixplain/modules/agent/tool/custom_python_code_tool.py @@ -0,0 +1,56 @@ +__author__ = "aiXplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Lucas Pavanelli and Thiago Castro Ferreira +Date: May 16th 2024 +Description: + Agentification Class +""" + +from typing import Text, Union, Callable +from aixplain.modules.agent.tool import Tool + + +class CustomPythonCodeTool(Tool): + """Custom Python Code Tool""" + + def __init__(self, code: Union[Text, Callable], description: Text = "", **additional_info) -> None: + """Custom Python Code Tool""" + super().__init__(name="Custom Python Code", description=description, **additional_info) + self.code = code + + def to_dict(self): + return { + "description": self.description, + "type": "utility", + "utility": "custom_python_code", + "utilityCode": self.code, + } + + def validate(self): + from aixplain.modules.model.utils import parse_code + + self.code, _, description = parse_code(self.code) + + assert ( + description is not None or self.description is not None + ), "Custom Python Code Tool Error: Tool description is required" + if self.description is None or self.description.strip() == "": + self.description = description + assert self.name and self.name.strip() != "", "Name is required" + assert self.description and self.description.strip() != "", "Description is required" + assert self.code and self.code.strip() != "", "Code is required" diff --git a/aixplain/modules/agent/tool/python_interpreter_tool.py b/aixplain/modules/agent/tool/python_interpreter_tool.py new file mode 100644 index 00000000..2d1daa30 --- /dev/null +++ b/aixplain/modules/agent/tool/python_interpreter_tool.py @@ -0,0 +1,42 @@ +__author__ = "aiXplain" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Lucas Pavanelli and Thiago Castro Ferreira +Date: May 16th 2024 +Description: + Agentification Class +""" + +from aixplain.modules.agent.tool import Tool + + +class PythonInterpreterTool(Tool): + """Python Interpreter Tool""" + + def __init__(self, **additional_info) -> None: + """Python Interpreter Tool""" + super().__init__(name="Python Interpreter", description="", **additional_info) + + def to_dict(self): + return { + "description": "", + "type": "utility", + "utility": "custom_python_code", + } + + def validate(self): + pass diff --git a/pyproject.toml b/pyproject.toml index 1656947a..8792e066 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.21" +version = "0.2.25" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" diff --git a/tests/functional/agent/agent_functional_test.py b/tests/functional/agent/agent_functional_test.py index 3f54d470..214f31b9 100644 --- a/tests/functional/agent/agent_functional_test.py +++ b/tests/functional/agent/agent_functional_test.py @@ -94,6 +94,55 @@ def test_end2end(run_input_map, delete_agents_and_team_agents): agent.delete() +def test_python_interpreter_tool(delete_agents_and_team_agents): + assert delete_agents_and_team_agents + tool = AgentFactory.create_python_interpreter_tool() + assert tool is not None + assert tool.name == "Python Interpreter" + assert tool.description == "" + + agent = AgentFactory.create( + name="Python Developer", + description="A Python developer agent. If you get an error from a tool, try to fix it.", + tools=[tool], + ) + assert agent is not None + response = agent.run("Solve the equation $\\frac{v^2}{2} + 7v - 16 = 0$ to find the value of $v$.") + assert response is not None + assert response["completed"] is True + assert response["status"].lower() == "success" + assert len(response["data"]["intermediate_steps"]) > 0 + intermediate_step = response["data"]["intermediate_steps"][0] + assert len(intermediate_step["tool_steps"]) > 0 + assert intermediate_step["tool_steps"][0]["tool"] == "Custom Code Tool" + agent.delete() + + +def test_custom_code_tool(delete_agents_and_team_agents): + assert delete_agents_and_team_agents + tool = AgentFactory.create_custom_python_code_tool( + name="Add Numbers", + description="Add two numbers", + code='def main(aaa: int, bbb: int) > int:\n """Add two numbers"""\n return aaa + bbb', + ) + assert tool is not None + assert tool.name == "Add Numbers" + assert tool.description == "Add two numbers" + assert tool.code == 'def main(aaa: int, bbb: int) -> int:\n """Add two numbers"""\n return aaa + bbb' + agent = AgentFactory.create( + name="Add Numbers Agent", + description="Add two numbers. Do not directly answer. Use the tool to add the numbers.", + tools=[tool], + ) + assert agent is not None + response = agent.run("How much is 12342 + 112312? Do not directly answer the question, call the tool.") + assert response is not None + assert response["completed"] is True + assert response["status"].lower() == "success" + assert "124654" in response["data"]["output"] + agent.delete() + + def test_list_agents(): agents = AgentFactory.list() assert "results" in agents @@ -173,7 +222,7 @@ def test_update_tools_of_agent(run_input_map, delete_agents_and_team_agents): ) assert agent is not None assert agent.status == AssetStatus.DRAFT - assert len(agent.tools) == 0 + assert len(agent.tools) == 0 tools = [] if "model_tools" in run_input_map: @@ -196,13 +245,13 @@ def test_update_tools_of_agent(run_input_map, delete_agents_and_team_agents): agent.update() agent = AgentFactory.get(agent.id) - assert len(agent.tools) == len(tools) + assert len(agent.tools) == len(tools) removed_tool = agent.tools.pop() agent.update() agent = AgentFactory.get(agent.id) - assert len(agent.tools) == len(tools) - 1 - assert removed_tool not in agent.tools + assert len(agent.tools) == len(tools) - 1 + assert removed_tool not in agent.tools agent.delete() diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index cf217919..6c17a5b6 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -5,14 +5,14 @@ from aixplain.modules.agent import OutputFormat from aixplain.utils import config from aixplain.factories import AgentFactory -from aixplain.modules.agent import PipelineTool, ModelTool +from aixplain.modules.agent import PipelineTool, ModelTool, PythonInterpreterTool, CustomPythonCodeTool from aixplain.modules.agent.utils import process_variables from urllib.parse import urljoin +from unittest.mock import patch import warnings from aixplain.enums.function import Function - def test_fail_no_data_query(): agent = Agent("123", "Test Agent", "Sample Description") with pytest.raises(Exception) as exc_info: @@ -105,58 +105,89 @@ def test_create_agent(): from aixplain.enums import Supplier with requests_mock.Mocker() as mock: - url = urljoin(config.BACKEND_URL, "sdk/agents") - headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} - - ref_response = { - "id": "123", - "name": "Test Agent", - "description": "Test Agent Description", - "teamId": "123", - "version": "1.0", - "status": "draft", - "llmId": "6646261c6eb563165658bbb1", - "pricing": {"currency": "USD", "value": 0.0}, - "assets": [ - { - "type": "model", - "supplier": "openai", - "version": "1.0", - "assetId": "6646261c6eb563165658bbb1", - "function": "text-generation", - "description": "Test Tool", - } - ], - } - mock.post(url, headers=headers, json=ref_response) - - url = urljoin(config.BACKEND_URL, "sdk/models/6646261c6eb563165658bbb1") - model_ref_response = { - "id": "6646261c6eb563165658bbb1", - "name": "Test LLM", - "description": "Test LLM Description", - "function": {"id": "text-generation"}, - "supplier": "openai", - "version": {"id": "1.0"}, - "status": "onboarded", - "pricing": {"currency": "USD", "value": 0.0}, - } - mock.get(url, headers=headers, json=model_ref_response) - - agent = AgentFactory.create( - name="Test Agent", - description="Test Agent Description", - llm_id="6646261c6eb563165658bbb1", - tools=[ - AgentFactory.create_model_tool(supplier=Supplier.OPENAI, function="text-generation", description="Test Tool") - ], - ) + with patch( + "aixplain.modules.model.utils.parse_code", + return_value=( + "utility_model_test", + [], + "utility_model_test", + ), + ): + url = urljoin(config.BACKEND_URL, "sdk/agents") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + + ref_response = { + "id": "123", + "name": "Test Agent", + "description": "Test Agent Description", + "teamId": "123", + "version": "1.0", + "status": "draft", + "llmId": "6646261c6eb563165658bbb1", + "pricing": {"currency": "USD", "value": 0.0}, + "assets": [ + { + "type": "model", + "supplier": "openai", + "version": "1.0", + "assetId": "6646261c6eb563165658bbb1", + "function": "text-generation", + "description": "Test Tool", + }, + { + "type": "utility", + "utility": "custom_python_code", + "description": "", + }, + { + "type": "utility", + "utility": "custom_python_code", + "utilityCode": "def main(query: str) -> str:\n return 'Hello, how are you?'", + "description": "Test Tool", + }, + ], + } + mock.post(url, headers=headers, json=ref_response) + + url = urljoin(config.BACKEND_URL, "sdk/models/6646261c6eb563165658bbb1") + model_ref_response = { + "id": "6646261c6eb563165658bbb1", + "name": "Test LLM", + "description": "Test LLM Description", + "function": {"id": "text-generation"}, + "supplier": "openai", + "version": {"id": "1.0"}, + "status": "onboarded", + "pricing": {"currency": "USD", "value": 0.0}, + } + mock.get(url, headers=headers, json=model_ref_response) + + agent = AgentFactory.create( + name="Test Agent", + description="Test Agent Description", + llm_id="6646261c6eb563165658bbb1", + tools=[ + AgentFactory.create_model_tool( + supplier=Supplier.OPENAI, function="text-generation", description="Test Tool" + ), + AgentFactory.create_custom_python_code_tool( + code="def main(query: str) -> str:\n return 'Hello, how are you?'", description="Test Tool" + ), + AgentFactory.create_python_interpreter_tool(), + ], + ) assert agent.name == ref_response["name"] assert agent.description == ref_response["description"] assert agent.llm_id == ref_response["llmId"] assert agent.tools[0].function.value == ref_response["assets"][0]["function"] assert agent.tools[0].description == ref_response["assets"][0]["description"] + assert isinstance(agent.tools[0], ModelTool) + assert agent.tools[1].description == ref_response["assets"][1]["description"] + assert isinstance(agent.tools[1], PythonInterpreterTool) + assert agent.tools[2].description == ref_response["assets"][2]["description"] + assert agent.tools[2].code == ref_response["assets"][2]["utilityCode"] + assert isinstance(agent.tools[2], CustomPythonCodeTool) assert agent.status == AssetStatus.DRAFT From 7628004059431606702aedb77c17194932d35ac5 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 24 Dec 2024 11:06:54 -0300 Subject: [PATCH 093/105] ENG-1230: set aixplain as default supplier (#348) * set aixplain as default supplier * Fixing supplier unit test --- aixplain/modules/asset.py | 4 ++-- tests/unit/model_test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/aixplain/modules/asset.py b/aixplain/modules/asset.py index c453415d..b4323cf5 100644 --- a/aixplain/modules/asset.py +++ b/aixplain/modules/asset.py @@ -32,7 +32,7 @@ def __init__( id: Text, name: Text, description: Text, - supplier: Union[Dict, Text, Supplier, int] = "aiXplain", + supplier: Union[Dict, Text, Supplier, int] = Supplier.AIXPLAIN, version: Text = "1.0", license: Optional[License] = None, privacy: Privacy = Privacy.PRIVATE, @@ -65,7 +65,7 @@ def __init__( if self.supplier is None: self.supplier = supplier except Exception: - self.supplier = str(supplier) + self.supplier = Supplier.AIXPLAIN self.version = version self.license = license self.privacy = privacy diff --git a/tests/unit/model_test.py b/tests/unit/model_test.py index 68da9ef6..1426b7d9 100644 --- a/tests/unit/model_test.py +++ b/tests/unit/model_test.py @@ -368,7 +368,7 @@ def test_model_to_dict(): def test_model_repr(): # Test with supplier as dict model1 = Model(id="test-id", name="Test Model", supplier={"name": "Test Supplier"}) - assert repr(model1) == "" + assert repr(model1) == "" # Test with supplier as string model2 = Model(id="test-id", name="Test Model", supplier="Test Supplier") From 5ddb867364ca050b9ea3fa680e8af0f9118738cb Mon Sep 17 00:00:00 2001 From: Zaina Abu Shaban Date: Thu, 26 Dec 2024 20:41:54 +0300 Subject: [PATCH 094/105] ENG-1069: Initial updates for airv2 model (#330) * Initial updates for airv2 model * added create function, renamed ingest to add, removed test file * added count and update methods * Refactoring Index code * Fixed add missing params * index doc and unit tests * index doc and unit tests * added delete * Changes in aIR v2 * Funcional test for aIR v2 --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/enums/response_status.py | 3 + aixplain/factories/__init__.py | 1 + aixplain/factories/index_factory.py | 23 +++++++ aixplain/factories/model_factory/utils.py | 3 + aixplain/modules/__init__.py | 1 + aixplain/modules/model/document_index.py | 20 ++++++ aixplain/modules/model/index_model.py | 84 +++++++++++++++++++++++ tests/functional/model/run_model_test.py | 12 ++++ tests/unit/index_model_test.py | 78 +++++++++++++++++++++ 9 files changed, 225 insertions(+) create mode 100644 aixplain/factories/index_factory.py create mode 100644 aixplain/modules/model/document_index.py create mode 100644 aixplain/modules/model/index_model.py create mode 100644 tests/unit/index_model_test.py diff --git a/aixplain/enums/response_status.py b/aixplain/enums/response_status.py index d2810753..257b9427 100644 --- a/aixplain/enums/response_status.py +++ b/aixplain/enums/response_status.py @@ -29,3 +29,6 @@ class ResponseStatus(Text, Enum): IN_PROGRESS = "IN_PROGRESS" SUCCESS = "SUCCESS" FAILED = "FAILED" + + def __str__(self): + return self.value diff --git a/aixplain/factories/__init__.py b/aixplain/factories/__init__.py index 104215a1..f663a4eb 100644 --- a/aixplain/factories/__init__.py +++ b/aixplain/factories/__init__.py @@ -33,3 +33,4 @@ from .finetune_factory import FinetuneFactory from .wallet_factory import WalletFactory from .api_key_factory import APIKeyFactory +from .index_factory import IndexFactory diff --git a/aixplain/factories/index_factory.py b/aixplain/factories/index_factory.py new file mode 100644 index 00000000..80dc6f07 --- /dev/null +++ b/aixplain/factories/index_factory.py @@ -0,0 +1,23 @@ +from aixplain.modules.model.index_model import IndexModel +from aixplain.factories import ModelFactory +from aixplain.enums import ResponseStatus +from typing import Text + + +class IndexFactory(ModelFactory): + @classmethod + def create(cls, name: Text, description: Text) -> IndexModel: + """Create a new index collection""" + model = cls.get("66eae6656eb56311f2595011") + + data = {"data": name, "description": description} + response = model.run(data=data) + if response.status == ResponseStatus.SUCCESS: + model_id = response.data + model = cls.get(model_id) + return model + + error_message = f"Index Factory Exception: {response.error_message}" + if error_message == "": + error_message = "Index Factory Exception:An error occurred while creating the index collection." + raise Exception(error_message) diff --git a/aixplain/factories/model_factory/utils.py b/aixplain/factories/model_factory/utils.py index daa1f0db..9fbf8f41 100644 --- a/aixplain/factories/model_factory/utils.py +++ b/aixplain/factories/model_factory/utils.py @@ -2,6 +2,7 @@ import logging from aixplain.modules.model import Model from aixplain.modules.model.llm_model import LLM +from aixplain.modules.model.index_model import IndexModel from aixplain.modules.model.utility_model import UtilityModel, UtilityModelInput from aixplain.enums import DataType, Function, Language, OwnershipType, Supplier, SortBy, SortOrder from aixplain.utils import config @@ -47,6 +48,8 @@ def create_model_from_response(response: Dict) -> Model: f = [p for p in response.get("params", []) if p["name"] == "temperature"] if len(f) > 0 and len(f[0].get("defaultValues", [])) > 0: temperature = float(f[0]["defaultValues"][0]["value"]) + elif function == Function.SEARCH: + ModelClass = IndexModel elif function == Function.UTILITIES: ModelClass = UtilityModel inputs = [ diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index 4432e1ad..f8a64650 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -37,3 +37,4 @@ from .agent.tool import Tool from .team_agent import TeamAgent from .api_key import APIKey, APIKeyLimits, APIKeyUsageLimit +from .model.index_model import IndexModel diff --git a/aixplain/modules/model/document_index.py b/aixplain/modules/model/document_index.py new file mode 100644 index 00000000..12562931 --- /dev/null +++ b/aixplain/modules/model/document_index.py @@ -0,0 +1,20 @@ +from typing import Optional +from uuid import uuid4 + + +class DocumentIndex: + def __init__(self, value: str, value_type: str = "text", id: Optional[str] = None, uri: str = "", attributes: dict = {}): + self.value = value + self.value_type = value_type + self.id = id if id is not None else str(uuid4()) + self.uri = uri + self.attributes = attributes + + def to_dict(self): + return { + "value": self.value, + "value_type": self.value_type, + "id": self.id, + "uri": self.uri, + "attributes": self.attributes, + } diff --git a/aixplain/modules/model/index_model.py b/aixplain/modules/model/index_model.py new file mode 100644 index 00000000..67b3f8f7 --- /dev/null +++ b/aixplain/modules/model/index_model.py @@ -0,0 +1,84 @@ +from aixplain.enums import Function, Supplier, ResponseStatus +from aixplain.modules.model import Model +from aixplain.utils import config +from aixplain.modules.model.response import ModelResponse +from typing import Text, Optional, Union, Dict +from aixplain.modules.model.document_index import DocumentIndex +from typing import List + + +class IndexModel(Model): + def __init__( + self, + id: Text, + name: Text, + description: Text = "", + api_key: Optional[Text] = None, + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", + version: Optional[Text] = None, + function: Optional[Function] = None, + is_subscribed: bool = False, + cost: Optional[Dict] = None, + **additional_info, + ) -> None: + """Index Init + + Args: + id (Text): ID of the Model + name (Text): Name of the Model + description (Text, optional): description of the model. Defaults to "". + api_key (Text, optional): API key of the Model. Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". + version (Text, optional): version of the model. Defaults to "1.0". + function (Function, optional): model AI function. Defaults to None. + is_subscribed (bool, optional): Is the user subscribed. Defaults to False. + cost (Dict, optional): model price. Defaults to None. + **additional_info: Any additional Model info to be saved + """ + assert function == Function.SEARCH, "Index only supports search function" + super().__init__( + id=id, + name=name, + description=description, + supplier=supplier, + version=version, + cost=cost, + function=function, + is_subscribed=is_subscribed, + api_key=api_key, + **additional_info, + ) + self.url = config.MODELS_RUN_URL + self.backend_url = config.BACKEND_URL + + def search(self, query: str, top_k: int = 10) -> ModelResponse: + data = {"action": "search", "data": query, "payload": {"filters": {}, "top_k": top_k}} + return self.run(data=data) + + def add(self, documents: List[DocumentIndex]) -> ModelResponse: + payloads = [doc.to_dict() for doc in documents] + data = {"action": "ingest", "data": "", "payload": {"payloads": payloads}} + response = self.run(data=data) + if response.status == ResponseStatus.SUCCESS: + response.data = payloads + return response + raise Exception(f"Failed to add documents: {response.error_message}") + + def update(self, documents: List[DocumentIndex]) -> ModelResponse: + payloads = [ + {"value": doc.value, "value_type": doc.value_type, "id": str(doc.id), "uri": doc.uri, "attributes": doc.attributes} + for doc in documents + ] + data = {"action": "update", "data": "", "payload": {"payloads": payloads}} + response = self.run(data=data) + if response.status == ResponseStatus.SUCCESS: + response.data = payloads + return response + raise Exception(f"Failed to update documents: {response.error_message}") + + def count(self) -> float: + data = {"action": "count", "data": ""} + response = self.run(data=data) + if response.status == "SUCCESS": + return int(response.data) + raise Exception(f"Failed to count documents: {response.error_message}") diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py index dae11dea..04e5da0d 100644 --- a/tests/functional/model/run_model_test.py +++ b/tests/functional/model/run_model_test.py @@ -52,3 +52,15 @@ def test_run_async(): assert response["status"] == "SUCCESS" assert "teste" in response["data"].lower() + + +def test_index_model(): + from aixplain.modules.model.document_index import DocumentIndex + from aixplain.factories import IndexFactory + + index_model = IndexFactory.create("test", "test") + index_model.add([DocumentIndex(value="Hello, world!", value_type="text", uri="", attributes={})]) + response = index_model.search("Hello") + assert str(response.status) == "SUCCESS" + assert index_model.count() == 1 + index_model.delete() diff --git a/tests/unit/index_model_test.py b/tests/unit/index_model_test.py new file mode 100644 index 00000000..be9acc6f --- /dev/null +++ b/tests/unit/index_model_test.py @@ -0,0 +1,78 @@ +import requests_mock +from aixplain.enums import Function, ResponseStatus +from aixplain.modules.model.document_index import DocumentIndex +from aixplain.modules.model.response import ModelResponse +from aixplain.modules.model.index_model import IndexModel +from aixplain.utils import config +import logging + + +data = {"data": "Model Index", "description": "This is a dummy collection for testing."} +index_id = "id" +execute_url = f"{config.MODELS_RUN_URL}/{index_id}".replace("/api/v1/execute", "/api/v2/execute") + + +def test_search_success(): + mock_response = {"status": "SUCCESS"} + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=mock_response, status_code=200) + index_model = IndexModel(id=index_id, data=data, name="name", function=Function.SEARCH) + response = index_model.search("test query") + + assert isinstance(response, ModelResponse) + assert response.status == ResponseStatus.SUCCESS + + +def test_add_success(): + mock_response = {"status": "SUCCESS"} + + mock_documents = [ + DocumentIndex(value="Sample document content 1", value_type="text", id=0, uri="", attributes={}), + DocumentIndex(value="Sample document content 2", value_type="text", id=1, uri="", attributes={}), + ] + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=mock_response, status_code=200) + + index_model = IndexModel(id=index_id, data=data, name="name", function=Function.SEARCH) + + response = index_model.add(mock_documents) + + assert isinstance(response, ModelResponse) + assert response.status == ResponseStatus.SUCCESS + + +def test_update_success(): + mock_response = {"status": "SUCCESS"} + + mock_documents = [ + DocumentIndex(value="Updated document content 1", value_type="text", id=0, uri="", attributes={}), + DocumentIndex(value="Updated document content 2", value_type="text", id=1, uri="", attributes={}), + ] + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=mock_response, status_code=200) + logging.debug(f"Requesting URL: {execute_url}") + + index_model = IndexModel(id=index_id, data=data, name="name", function=Function.SEARCH) + + response = index_model.update(mock_documents) + + assert isinstance(response, ModelResponse) + assert response.status == ResponseStatus.SUCCESS + + +def test_count_success(): + mock_response = {"status": "SUCCESS", "data": 4} + + with requests_mock.Mocker() as mock: + mock.post(execute_url, json=mock_response, status_code=200) + logging.debug(f"Requesting URL: {execute_url}") + + index_model = IndexModel(id=index_id, data=data, name="name", function=Function.SEARCH) + + response = index_model.count() + + assert isinstance(response, int) + assert response == 4 From 4cb3b743c012c3602c34f7e1455099014b40b369 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Mon, 30 Dec 2024 16:37:47 +0100 Subject: [PATCH 095/105] ENG-1282: Set default pipeline engine version to 3.0 with fallback to 2.0 in case of failure (#349) --- aixplain/modules/pipeline/asset.py | 144 +++++++++++++++++--- tests/functional/pipelines/fallback_test.py | 15 ++ 2 files changed, 138 insertions(+), 21 deletions(-) create mode 100644 tests/functional/pipelines/fallback_test.py diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index cc2bb8c6..10ee3bf0 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -45,6 +45,9 @@ class Pipeline(Asset): **additional_info: Any additional Pipeline info to be saved """ + VERSION_3_0 = "3.0" + VERSION_2_0 = "2.0" + def __init__( self, id: Text, @@ -100,7 +103,9 @@ def __polling( while not completed and (end - start) < timeout: try: response_body = self.poll(poll_url, name=name) - logging.debug(f"Polling for Pipeline: Status of polling for {name} : {response_body}") + logging.debug( + f"Polling for Pipeline: Status of polling for {name} : {response_body}" + ) completed = response_body["completed"] end = time.time() @@ -112,9 +117,13 @@ def __polling( logging.error(f"Polling for Pipeline: polling for {name} : Continue") if response_body and response_body["status"] == "SUCCESS": try: - logging.debug(f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}") + logging.debug( + f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}" + ) except Exception: - logging.error(f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}") + logging.error( + f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}" + ) else: logging.error( f"Polling for Pipeline: Final status of polling for {name} : No response in {timeout} seconds - {response_body}" @@ -144,11 +153,44 @@ def poll(self, poll_url: Text, name: Text = "pipeline_process") -> Dict: resp["data"] = json.loads(resp["data"])["response"] except Exception: resp = r.json() - logging.info(f"Single Poll for Pipeline: Status of polling for {name} : {resp}") + logging.info( + f"Single Poll for Pipeline: Status of polling for {name} : {resp}" + ) except Exception: resp = {"status": "FAILED"} return resp + def _should_fallback_to_v2(self, response: Dict, version: str) -> bool: + """Determine if the pipeline should fallback to version 2.0 based on the response. + + Args: + response (Dict): The response from the pipeline call. + version (str): The version of the pipeline being used. + + Returns: + bool: True if fallback is needed, False otherwise. + """ + # If the version is not 3.0, no fallback is needed + if version != self.VERSION_3_0: + return False + + should_fallback = False + if "status" not in response or response["status"] == "FAILED": + should_fallback = True + elif response["status"] == "SUCCESS" and ( + "data" not in response or not response["data"] + ): + should_fallback = True + # Check for conditions that require a fallback + + if should_fallback: + logging.warning( + f"Pipeline Run Error: Failed to run pipeline {self.id} with version {version}. " + f"Trying with version {self.VERSION_2_0}." + ) + + return should_fallback + def run( self, data: Union[Text, Dict], @@ -157,6 +199,7 @@ def run( timeout: float = 20000.0, wait_time: float = 1.0, batch_mode: bool = True, + version: str = None, **kwargs, ) -> Dict: """Runs a pipeline call. @@ -173,16 +216,39 @@ def run( Returns: Dict: parsed output from pipeline """ + version = version or self.VERSION_3_0 start = time.time() try: - response = self.run_async(data, data_asset=data_asset, name=name, batch_mode=batch_mode, **kwargs) + response = self.run_async( + data, + data_asset=data_asset, + name=name, + batch_mode=batch_mode, + version=version, + **kwargs, + ) + if response["status"] == "FAILED": end = time.time() response["elapsed_time"] = end - start return response + poll_url = response["url"] end = time.time() - response = self.__polling(poll_url, name=name, timeout=timeout, wait_time=wait_time) + response = self.__polling( + poll_url, name=name, timeout=timeout, wait_time=wait_time + ) + + if self._should_fallback_to_v2(response, version): + return self.run( + data, + data_asset=data_asset, + name=name, + batch_mode=batch_mode, + version=self.VERSION_2_0, + **kwargs, + ) + response["version"] = version return response except Exception as e: error_message = f"Error in request for {name}: {str(e)}" @@ -193,6 +259,7 @@ def run( "status": "FAILED", "error": error_message, "elapsed_time": end - start, + "version": version, } def __prepare_payload( @@ -232,7 +299,10 @@ def __prepare_payload( try: payload = json.loads(data) if isinstance(payload, dict) is False: - if isinstance(payload, int) is True or isinstance(payload, float) is True: + if ( + isinstance(payload, int) is True + or isinstance(payload, float) is True + ): payload = str(payload) payload = {"data": payload} except Exception: @@ -270,7 +340,9 @@ def __prepare_payload( asset_payload["dataAsset"]["dataset_id"] = dasset.id source_data_list = [ - dfield for dfield in dasset.source_data if dasset.source_data[dfield].id == data[node_label] + dfield + for dfield in dasset.source_data + if dasset.source_data[dfield].id == data[node_label] ] if len(source_data_list) > 0: @@ -309,6 +381,7 @@ def run_async( data_asset: Optional[Union[Text, Dict]] = None, name: Text = "pipeline_process", batch_mode: bool = True, + version: str = None, **kwargs, ) -> Dict: """Runs asynchronously a pipeline call. @@ -323,6 +396,7 @@ def run_async( Returns: Dict: polling URL in response """ + version = version or self.VERSION_3_0 headers = { "x-api-key": self.api_key, "Content-Type": "application/json", @@ -330,6 +404,7 @@ def run_async( payload = self.__prepare_payload(data=data, data_asset=data_asset) payload["batchmode"] = batch_mode + payload["version"] = version payload.update(kwargs) payload = json.dumps(payload) call_url = f"{self.url}/{self.id}" @@ -340,7 +415,9 @@ def run_async( try: if 200 <= r.status_code < 300: resp = r.json() - logging.info(f"Result of request for {name} - {r.status_code} - {resp}") + logging.info( + f"Result of request for {name} - {r.status_code} - {resp}" + ) poll_url = resp["url"] response = {"status": "IN_PROGRESS", "url": poll_url} else: @@ -356,15 +433,24 @@ def run_async( error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." else: status_code = str(r.status_code) - error = ( - f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." - ) + error = f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." response = {"status": "FAILED", "error_message": error} logging.error(f"Error in request for {name} - {r.status_code}: {error}") except Exception: response = {"status": "FAILED"} if resp is not None: response["error"] = resp + + if self._should_fallback_to_v2(response, version): + return self.run_async( + data, + data_asset=data_asset, + name=name, + batch_mode=batch_mode, + version=self.VERSION_2_0, + **kwargs, + ) + response["version"] = version return response def update( @@ -386,14 +472,15 @@ def update( """ import warnings import inspect + # Get the current call stack stack = inspect.stack() - if len(stack) > 2 and stack[1].function != 'save': + if len(stack) > 2 and stack[1].function != "save": warnings.warn( "update() is deprecated and will be removed in a future version. " "Please use save() instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) try: if isinstance(pipeline, str) is True: @@ -406,7 +493,9 @@ def update( for i, node in enumerate(pipeline["nodes"]): if "functionType" in node: - pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i][ + "functionType" + ].lower() # prepare payload status = "draft" if save_as_asset is True: @@ -424,7 +513,9 @@ def update( "Authorization": f"Token {api_key}", "Content-Type": "application/json", } - logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}") + logging.info( + f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}" + ) r = _request_with_retry("put", url, headers=headers, json=payload) response = r.json() logging.info(f"Pipeline {response['id']} Updated.") @@ -448,7 +539,12 @@ def delete(self) -> None: logging.error(message) raise Exception(f"{message}") - def save(self, pipeline: Optional[Union[Text, Dict]] = None, save_as_asset: bool = False, api_key: Optional[Text] = None): + def save( + self, + pipeline: Optional[Union[Text, Dict]] = None, + save_as_asset: bool = False, + api_key: Optional[Text] = None, + ): """Update and Save Pipeline Args: @@ -470,11 +566,15 @@ def save(self, pipeline: Optional[Union[Text, Dict]] = None, save_as_asset: bool ), "Pipeline Update Error: Make sure the pipeline to be saved is in a JSON file." with open(pipeline) as f: pipeline = json.load(f) - self.update(pipeline=pipeline, save_as_asset=save_as_asset, api_key=api_key) + self.update( + pipeline=pipeline, save_as_asset=save_as_asset, api_key=api_key + ) for i, node in enumerate(pipeline["nodes"]): if "functionType" in node: - pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i][ + "functionType" + ].lower() # prepare payload status = "draft" if save_as_asset is True: @@ -491,8 +591,10 @@ def save(self, pipeline: Optional[Union[Text, Dict]] = None, save_as_asset: bool "Authorization": f"Token {api_key}", "Content-Type": "application/json", } - logging.info(f"Start service for Save Pipeline - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, json=payload) + logging.info( + f"Start service for Save Pipeline - {url} - {headers} - {json.dumps(payload)}" + ) + r = _request_with_retry(method, url, headers=headers, json=payload) response = r.json() self.id = response["id"] logging.info(f"Pipeline {response['id']} Saved.") diff --git a/tests/functional/pipelines/fallback_test.py b/tests/functional/pipelines/fallback_test.py new file mode 100644 index 00000000..4650bff3 --- /dev/null +++ b/tests/functional/pipelines/fallback_test.py @@ -0,0 +1,15 @@ +from aixplain.factories import PipelineFactory + + +def test_fallback_to_v2(): + pipeline = PipelineFactory.get("6750535166d4db27e14f07b1") + response = pipeline.run( + "https://homepage.ntu.edu.tw/~karchung/miniconversations/mc1.mp3" + ) + assert response["version"] == "3.0" + assert response["status"] == "SUCCESS" + + pipeline = PipelineFactory.get("6750535166d4db27e14f07b1") + response = pipeline.run("<>") + assert response["version"] == "2.0" + assert response["status"] == "ERROR" From 889c641ece4fc7e75e08cfdd95599aaae429f780 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Mon, 30 Dec 2024 16:38:13 +0100 Subject: [PATCH 096/105] ENG-1288: Added diarization test into pipelines functional tests (#351) --- tests/functional/pipelines/run_test.py | 91 +++++++++++++++++++++----- 1 file changed, 73 insertions(+), 18 deletions(-) diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index 985e4a91..7a1138bf 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -51,7 +51,9 @@ def test_get_pipeline(): def test_run_single_str(batchmode: bool, version: str): pipeline = PipelineFactory.list(query="SingleNodePipeline")["results"][0] - response = pipeline.run(data="Translate this thing", batch_mode=batchmode, **{"version": version}) + response = pipeline.run( + data="Translate this thing", batch_mode=batchmode, **{"version": version} + ) assert response["status"] == "SUCCESS" @@ -91,7 +93,7 @@ def test_run_with_url(batchmode: bool, version: str): response = pipeline.run( data="https://aixplain-platform-assets.s3.amazonaws.com/data/dev/64c81163f8bdcac7443c2dad/data/f8.txt", batch_mode=batchmode, - **{"version": version} + **{"version": version}, ) assert response["status"] == "SUCCESS" @@ -111,7 +113,12 @@ def test_run_with_dataset(batchmode: bool, version: str): data_id = dataset.source_data["en"].id pipeline = PipelineFactory.list(query="SingleNodePipeline")["results"][0] - response = pipeline.run(data=data_id, data_asset=data_asset_id, batch_mode=batchmode, **{"version": version}) + response = pipeline.run( + data=data_id, + data_asset=data_asset_id, + batch_mode=batchmode, + **{"version": version}, + ) assert response["status"] == "SUCCESS" @@ -130,7 +137,7 @@ def test_run_multipipe_with_strings(batchmode: bool, version: str): response = pipeline.run( data={"Input": "Translate this thing.", "Reference": "Traduza esta coisa."}, batch_mode=batchmode, - **{"version": version} + **{"version": version}, ) assert response["status"] == "SUCCESS" @@ -157,15 +164,20 @@ def test_run_multipipe_with_datasets(batchmode: bool, version: str): data={"Input": input_id, "Reference": reference_id}, data_asset={"Input": data_asset_id, "Reference": data_asset_id}, batch_mode=batchmode, - **{"version": version} + **{"version": version}, ) assert response["status"] == "SUCCESS" @pytest.mark.parametrize("version", ["2.0", "3.0"]) def test_run_segment_reconstruct(version: str): - pipeline = PipelineFactory.list(query="Segmentation/Reconstruction Functional Test - DO NOT DELETE")["results"][0] - response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", **{"version": version}) + pipeline = PipelineFactory.list( + query="Segmentation/Reconstruction Functional Test - DO NOT DELETE" + )["results"][0] + response = pipeline.run( + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + **{"version": version}, + ) assert response["status"] == "SUCCESS" output = response["data"][0] @@ -179,11 +191,13 @@ def test_run_translation_metric(version: str): reference_id = dataset.target_data["pt"][0].id - pipeline = PipelineFactory.list(query="Translation Metric Functional Test - DO NOT DELETE")["results"][0] + pipeline = PipelineFactory.list( + query="Translation Metric Functional Test - DO NOT DELETE" + )["results"][0] response = pipeline.run( data={"TextInput": reference_id, "ReferenceInput": reference_id}, data_asset={"TextInput": data_asset_id, "ReferenceInput": data_asset_id}, - **{"version": version} + **{"version": version}, ) assert response["status"] == "SUCCESS" @@ -194,13 +208,15 @@ def test_run_translation_metric(version: str): @pytest.mark.parametrize("version", ["2.0", "3.0"]) def test_run_metric(version: str): - pipeline = PipelineFactory.list(query="ASR Metric Functional Test - DO NOT DELETE")["results"][0] + pipeline = PipelineFactory.list(query="ASR Metric Functional Test - DO NOT DELETE")[ + "results" + ][0] response = pipeline.run( { "AudioInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "ReferenceInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", }, - **{"version": version} + **{"version": version}, ) assert response["status"] == "SUCCESS" @@ -212,10 +228,26 @@ def test_run_metric(version: str): @pytest.mark.parametrize( "input_data,output_data,version", [ - ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput", "2.0"), - ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput", "2.0"), - ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput", "3.0"), - ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput", "3.0"), + ( + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "AudioOutput", + "2.0", + ), + ( + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", + "TextOutput", + "2.0", + ), + ( + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "AudioOutput", + "3.0", + ), + ( + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", + "TextOutput", + "3.0", + ), ], ) def test_run_router(input_data: str, output_data: str, version: str): @@ -245,8 +277,13 @@ def test_run_decision(input_data: str, output_data: str, version: str): @pytest.mark.parametrize("version", ["3.0"]) def test_run_script(version: str): - pipeline = PipelineFactory.list(query="Script Functional Test - DO NOT DELETE")["results"][0] - response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", **{"version": version}) + pipeline = PipelineFactory.list(query="Script Functional Test - DO NOT DELETE")[ + "results" + ][0] + response = pipeline.run( + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + **{"version": version}, + ) assert response["status"] == "SUCCESS" data = response["data"][0]["segments"][0]["response"] @@ -255,7 +292,9 @@ def test_run_script(version: str): @pytest.mark.parametrize("version", ["2.0", "3.0"]) def test_run_text_reconstruction(version: str): - pipeline = PipelineFactory.list(query="Text Reconstruction - DO NOT DELETE")["results"][0] + pipeline = PipelineFactory.list(query="Text Reconstruction - DO NOT DELETE")[ + "results" + ][0] response = pipeline.run("Segment A\nSegment B\nSegment C", **{"version": version}) assert response["status"] == "SUCCESS" @@ -268,3 +307,19 @@ def test_run_text_reconstruction(version: str): for d in response["data"]: assert len(d["segments"]) > 0 assert d["segments"][0]["success"] is True + + +@pytest.mark.parametrize("version", ["3.0"]) +def test_run_diarization(version: str): + pipeline = PipelineFactory.list( + query="Diarization ASR Functional Test - DO NOT DELETE" + )["results"][0] + response = pipeline.run( + "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + **{"version": version}, + ) + + assert response["status"] == "SUCCESS" + for d in response["data"]: + assert len(d["segments"]) > 0 + assert d["segments"][0]["success"] is True From 5ee6b97f004ed57df7d14b6f2b52bba35081ee51 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:43:05 -0300 Subject: [PATCH 097/105] Set method on pipeline saving (#354) --- aixplain/modules/pipeline/asset.py | 66 +++++++++--------------------- 1 file changed, 19 insertions(+), 47 deletions(-) diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index 10ee3bf0..b4615ae4 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -103,9 +103,7 @@ def __polling( while not completed and (end - start) < timeout: try: response_body = self.poll(poll_url, name=name) - logging.debug( - f"Polling for Pipeline: Status of polling for {name} : {response_body}" - ) + logging.debug(f"Polling for Pipeline: Status of polling for {name} : {response_body}") completed = response_body["completed"] end = time.time() @@ -117,13 +115,9 @@ def __polling( logging.error(f"Polling for Pipeline: polling for {name} : Continue") if response_body and response_body["status"] == "SUCCESS": try: - logging.debug( - f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}" - ) + logging.debug(f"Polling for Pipeline: Final status of polling for {name} : SUCCESS - {response_body}") except Exception: - logging.error( - f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}" - ) + logging.error(f"Polling for Pipeline: Final status of polling for {name} : ERROR - {response_body}") else: logging.error( f"Polling for Pipeline: Final status of polling for {name} : No response in {timeout} seconds - {response_body}" @@ -153,9 +147,7 @@ def poll(self, poll_url: Text, name: Text = "pipeline_process") -> Dict: resp["data"] = json.loads(resp["data"])["response"] except Exception: resp = r.json() - logging.info( - f"Single Poll for Pipeline: Status of polling for {name} : {resp}" - ) + logging.info(f"Single Poll for Pipeline: Status of polling for {name} : {resp}") except Exception: resp = {"status": "FAILED"} return resp @@ -177,9 +169,7 @@ def _should_fallback_to_v2(self, response: Dict, version: str) -> bool: should_fallback = False if "status" not in response or response["status"] == "FAILED": should_fallback = True - elif response["status"] == "SUCCESS" and ( - "data" not in response or not response["data"] - ): + elif response["status"] == "SUCCESS" and ("data" not in response or not response["data"]): should_fallback = True # Check for conditions that require a fallback @@ -235,9 +225,7 @@ def run( poll_url = response["url"] end = time.time() - response = self.__polling( - poll_url, name=name, timeout=timeout, wait_time=wait_time - ) + response = self.__polling(poll_url, name=name, timeout=timeout, wait_time=wait_time) if self._should_fallback_to_v2(response, version): return self.run( @@ -299,10 +287,7 @@ def __prepare_payload( try: payload = json.loads(data) if isinstance(payload, dict) is False: - if ( - isinstance(payload, int) is True - or isinstance(payload, float) is True - ): + if isinstance(payload, int) is True or isinstance(payload, float) is True: payload = str(payload) payload = {"data": payload} except Exception: @@ -340,9 +325,7 @@ def __prepare_payload( asset_payload["dataAsset"]["dataset_id"] = dasset.id source_data_list = [ - dfield - for dfield in dasset.source_data - if dasset.source_data[dfield].id == data[node_label] + dfield for dfield in dasset.source_data if dasset.source_data[dfield].id == data[node_label] ] if len(source_data_list) > 0: @@ -415,9 +398,7 @@ def run_async( try: if 200 <= r.status_code < 300: resp = r.json() - logging.info( - f"Result of request for {name} - {r.status_code} - {resp}" - ) + logging.info(f"Result of request for {name} - {r.status_code} - {resp}") poll_url = resp["url"] response = {"status": "IN_PROGRESS", "url": poll_url} else: @@ -433,7 +414,9 @@ def run_async( error = "Validation-related error: Please ensure all required fields are provided and correctly formatted." else: status_code = str(r.status_code) - error = f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + error = ( + f"Status {status_code}: Unspecified error: An unspecified error occurred while processing your request." + ) response = {"status": "FAILED", "error_message": error} logging.error(f"Error in request for {name} - {r.status_code}: {error}") except Exception: @@ -477,8 +460,7 @@ def update( stack = inspect.stack() if len(stack) > 2 and stack[1].function != "save": warnings.warn( - "update() is deprecated and will be removed in a future version. " - "Please use save() instead.", + "update() is deprecated and will be removed in a future version. " "Please use save() instead.", DeprecationWarning, stacklevel=2, ) @@ -493,9 +475,7 @@ def update( for i, node in enumerate(pipeline["nodes"]): if "functionType" in node: - pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i][ - "functionType" - ].lower() + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload status = "draft" if save_as_asset is True: @@ -513,9 +493,7 @@ def update( "Authorization": f"Token {api_key}", "Content-Type": "application/json", } - logging.info( - f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}" - ) + logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("put", url, headers=headers, json=payload) response = r.json() logging.info(f"Pipeline {response['id']} Updated.") @@ -566,15 +544,11 @@ def save( ), "Pipeline Update Error: Make sure the pipeline to be saved is in a JSON file." with open(pipeline) as f: pipeline = json.load(f) - self.update( - pipeline=pipeline, save_as_asset=save_as_asset, api_key=api_key - ) + self.update(pipeline=pipeline, save_as_asset=save_as_asset, api_key=api_key) for i, node in enumerate(pipeline["nodes"]): if "functionType" in node: - pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i][ - "functionType" - ].lower() + pipeline["nodes"][i]["functionType"] = pipeline["nodes"][i]["functionType"].lower() # prepare payload status = "draft" if save_as_asset is True: @@ -591,10 +565,8 @@ def save( "Authorization": f"Token {api_key}", "Content-Type": "application/json", } - logging.info( - f"Start service for Save Pipeline - {url} - {headers} - {json.dumps(payload)}" - ) - r = _request_with_retry(method, url, headers=headers, json=payload) + logging.info(f"Start service for Save Pipeline - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) response = r.json() self.id = response["id"] logging.info(f"Pipeline {response['id']} Saved.") From 48c774d4bffd6c59cb71989c70838b4e651a2fc9 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:44:14 -0300 Subject: [PATCH 098/105] BUG-275: team deployment (#332) * Correctly setting the status of a team agent * Pipeline deploy method * Test for deploying teams * Adding pipeline.deploy unit test --- aixplain/factories/agent_factory/utils.py | 13 ++++--- aixplain/factories/pipeline_factory/utils.py | 4 +- aixplain/modules/pipeline/asset.py | 18 +++++++++ aixplain/modules/team_agent/__init__.py | 7 ++-- tests/functional/pipelines/create_test.py | 5 +++ .../team_agent/team_agent_functional_test.py | 7 +++- tests/unit/pipeline_test.py | 15 ++++++++ tests/unit/team_agent_test.py | 37 +++++++++++++++---- 8 files changed, 86 insertions(+), 20 deletions(-) diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index 54f746d8..d6857468 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -19,12 +19,13 @@ def build_agent(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> Agent: if tool["type"] == "model": supplier = "aixplain" for supplier_ in Supplier: - if tool["supplier"] is not None and tool["supplier"].lower() in [ - supplier_.value["code"].lower(), - supplier_.value["name"].lower(), - ]: - supplier = supplier_ - break + if isinstance(tool["supplier"], str): + if tool["supplier"] is not None and tool["supplier"].lower() in [ + supplier_.value["code"].lower(), + supplier_.value["name"].lower(), + ]: + supplier = supplier_ + break tool = ModelTool( function=Function(tool.get("function", None)), diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py index 08954571..5cd25580 100644 --- a/aixplain/factories/pipeline_factory/utils.py +++ b/aixplain/factories/pipeline_factory/utils.py @@ -32,7 +32,9 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipe response["api_key"] = config.TEAM_API_KEY # instantiating pipeline generic info - pipeline = Pipeline(response["id"], response["name"], response["api_key"]) + pipeline = Pipeline( + id=response["id"], name=response["name"], api_key=response["api_key"], status=response.get("status", "draft") + ) if load_architecture is True: try: # instantiating nodes diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index b4615ae4..4c557267 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -25,6 +25,7 @@ import json import os import logging +from aixplain.enums.asset_status import AssetStatus from aixplain.modules.asset import Asset from aixplain.utils import config from aixplain.utils.file_utils import _request_with_retry @@ -56,6 +57,7 @@ def __init__( url: Text = config.BACKEND_URL, supplier: Text = "aiXplain", version: Text = "1.0", + status: AssetStatus = AssetStatus.DRAFT, **additional_info, ) -> None: """Create a Pipeline with the necessary information @@ -67,6 +69,7 @@ def __init__( url (Text, optional): running URL of platform. Defaults to config.BACKEND_URL. supplier (Text, optional): Pipeline supplier. Defaults to "aiXplain". version (Text, optional): version of the pipeline. Defaults to "1.0". + status (AssetStatus, optional): Pipeline status. Defaults to AssetStatus.DRAFT. **additional_info: Any additional Pipeline info to be saved """ if not name: @@ -75,6 +78,12 @@ def __init__( super().__init__(id, name, "", supplier, version) self.api_key = api_key self.url = f"{url}/assets/pipeline/execution/run" + if isinstance(status, str): + try: + status = AssetStatus(status) + except Exception: + status = AssetStatus.DRAFT + self.status = status self.additional_info = additional_info def __polling( @@ -572,3 +581,12 @@ def save( logging.info(f"Pipeline {response['id']} Saved.") except Exception as e: raise Exception(e) + + def deploy(self, api_key: Optional[Text] = None) -> None: + """Deploy the Pipeline.""" + assert self.status == "draft", "Pipeline Deployment Error: Pipeline must be in draft status." + assert self.status != "onboarded", "Pipeline Deployment Error: Pipeline must be onboarded." + + pipeline = self.to_dict() + self.update(pipeline=pipeline, save_as_asset=True, api_key=api_key, name=self.name) + self.status = AssetStatus.ONBOARDED diff --git a/aixplain/modules/team_agent/__init__.py b/aixplain/modules/team_agent/__init__.py index 80729d80..b7094348 100644 --- a/aixplain/modules/team_agent/__init__.py +++ b/aixplain/modules/team_agent/__init__.py @@ -70,7 +70,7 @@ def __init__( version: Optional[Text] = None, cost: Optional[Dict] = None, use_mentalist_and_inspector: bool = True, - status: AssetStatus = AssetStatus.ONBOARDING, + status: AssetStatus = AssetStatus.DRAFT, **additional_info, ) -> None: """Create a FineTune with the necessary information. @@ -97,7 +97,7 @@ def __init__( try: status = AssetStatus(status) except Exception: - status = AssetStatus.ONBOARDING + status = AssetStatus.DRAFT self.status = status def run( @@ -286,8 +286,9 @@ def to_dict(self) -> Dict: "llmId": self.llm_id, "supervisorId": self.llm_id, "plannerId": self.llm_id if self.use_mentalist_and_inspector else None, - "supplier": self.supplier, + "supplier": self.supplier.value["code"] if isinstance(self.supplier, Supplier) else self.supplier, "version": self.version, + "status": self.status.value, } def validate(self) -> None: diff --git a/tests/functional/pipelines/create_test.py b/tests/functional/pipelines/create_test.py index 6cf3d718..2cad384a 100644 --- a/tests/functional/pipelines/create_test.py +++ b/tests/functional/pipelines/create_test.py @@ -43,6 +43,11 @@ def test_create_pipeline_from_string(): assert isinstance(pipeline, Pipeline) assert pipeline.id != "" + assert pipeline.status.value == "draft" + + pipeline.deploy() + pipeline = PipelineFactory.get(pipeline.id) + assert pipeline.status.value == "onboarded" pipeline.delete() diff --git a/tests/functional/team_agent/team_agent_functional_test.py b/tests/functional/team_agent/team_agent_functional_test.py index e60e453a..a402f324 100644 --- a/tests/functional/team_agent/team_agent_functional_test.py +++ b/tests/functional/team_agent/team_agent_functional_test.py @@ -33,6 +33,7 @@ def read_data(data_path): return json.load(open(data_path, "r")) + @pytest.fixture(scope="function") def delete_agents_and_team_agents(): for team_agent in TeamAgentFactory.list()["results"]: @@ -94,6 +95,7 @@ def test_end2end(run_input_map, delete_agents_and_team_agents): team_agent.deploy() team_agent = TeamAgentFactory.get(team_agent.id) assert team_agent is not None + assert team_agent.status == AssetStatus.ONBOARDED response = team_agent.run(data=run_input_map["query"]) assert response is not None @@ -161,6 +163,7 @@ def test_fail_non_existent_llm(): ) assert str(exc_info.value) == "Large Language Model with ID 'non_existent_llm' not found." + def test_add_remove_agents_from_team_agent(run_input_map, delete_agents_and_team_agents): assert delete_agents_and_team_agents @@ -210,12 +213,12 @@ def test_add_remove_agents_from_team_agent(run_input_map, delete_agents_and_team assert new_agent.id in [agent.id for agent in team_agent.agents] assert len(team_agent.agents) == len(agents) + 1 - removed_agent = team_agent.agents.pop(0) + removed_agent = team_agent.agents.pop(0) team_agent.update() team_agent = TeamAgentFactory.get(team_agent.id) assert removed_agent.id not in [agent.id for agent in team_agent.agents] - assert len(team_agent.agents) == len(agents) + assert len(team_agent.agents) == len(agents) team_agent.delete() new_agent.delete() diff --git a/tests/unit/pipeline_test.py b/tests/unit/pipeline_test.py index d1b0f9b2..913fe295 100644 --- a/tests/unit/pipeline_test.py +++ b/tests/unit/pipeline_test.py @@ -96,3 +96,18 @@ def test_get_pipeline_error_response(): PipelineFactory.get(pipeline_id=pipeline_id) assert "Pipeline GET Error: Failed to retrieve pipeline test-pipeline-id. Status Code: 404" in str(excinfo.value) + + +def test_deploy_pipeline(): + with requests_mock.Mocker() as mock: + pipeline_id = "test-pipeline-id" + url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{pipeline_id}") + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + mock.put(url, headers=headers, json={"status": "SUCCESS", "id": pipeline_id}) + + pipeline = Pipeline(id=pipeline_id, api_key=config.TEAM_API_KEY, name="Test Pipeline", url=config.BACKEND_URL) + pipeline.deploy() + + assert pipeline.id == pipeline_id + assert pipeline.status.value == "onboarded" diff --git a/tests/unit/team_agent_test.py b/tests/unit/team_agent_test.py index 56564b73..e6901cec 100644 --- a/tests/unit/team_agent_test.py +++ b/tests/unit/team_agent_test.py @@ -186,11 +186,32 @@ def test_create_team_agent(): llm_id="6646261c6eb563165658bbb1", agents=[agent], ) - assert team_agent.id is not None - assert team_agent.name == team_ref_response["name"] - assert team_agent.description == team_ref_response["description"] - assert team_agent.llm_id == team_ref_response["llmId"] - assert team_agent.use_mentalist_and_inspector is True - assert team_agent.status == AssetStatus.DRAFT - assert len(team_agent.agents) == 1 - assert team_agent.agents[0].id == team_ref_response["agents"][0]["assetId"] + assert team_agent.id is not None + assert team_agent.name == team_ref_response["name"] + assert team_agent.description == team_ref_response["description"] + assert team_agent.llm_id == team_ref_response["llmId"] + assert team_agent.use_mentalist_and_inspector is True + assert team_agent.status == AssetStatus.DRAFT + assert len(team_agent.agents) == 1 + assert team_agent.agents[0].id == team_ref_response["agents"][0]["assetId"] + + url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{team_agent.id}") + team_ref_response = { + "id": "team_agent_123", + "name": "TEST Multi agent", + "status": "onboarded", + "teamId": 645, + "description": "TEST Multi agent", + "llmId": "6646261c6eb563165658bbb1", + "assets": [], + "agents": [{"assetId": "123", "type": "AGENT", "number": 0, "label": "AGENT"}], + "links": [], + "plannerId": "6646261c6eb563165658bbb1", + "supervisorId": "6646261c6eb563165658bbb1", + "createdAt": "2024-10-28T19:30:25.344Z", + "updatedAt": "2024-10-28T19:30:25.344Z", + } + mock.put(url, headers=headers, json=team_ref_response) + + team_agent.deploy() + assert team_agent.status.value == "onboarded" From 220dddab855f57901f2f80d87240ebedc50b1e50 Mon Sep 17 00:00:00 2001 From: kadirpekel Date: Thu, 2 Jan 2025 21:28:52 +0100 Subject: [PATCH 099/105] ENG-1289: Enabled reconstructor and segmentor designer nodes when pipeline fetched (#352) Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/pipeline_factory/utils.py | 57 ++++++++++++++----- .../modules/pipeline/designer/__init__.py | 2 + aixplain/modules/pipeline/designer/nodes.py | 12 +--- 3 files changed, 48 insertions(+), 23 deletions(-) diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py index 5cd25580..2a7de16b 100644 --- a/aixplain/factories/pipeline_factory/utils.py +++ b/aixplain/factories/pipeline_factory/utils.py @@ -14,6 +14,8 @@ Route, Script, Link, + BareSegmentor, + BareReconstructor, ) from typing import Dict @@ -47,28 +49,45 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipe elif node_json["type"].lower() == "asset": if node_json["functionType"] == "metric": node = BareMetric(asset_id=node_json["assetId"]) + elif node_json["functionType"] == "reconstructor": + node = BareReconstructor(asset_id=node_json["assetId"]) + elif node_json["functionType"] == "segmentor": + node = BareSegmentor(asset_id=node_json["assetId"]) else: node = BareAsset(asset_id=node_json["assetId"]) - elif node_json["type"].lower() == "segmentor": - raise NotImplementedError() - elif node_json["type"].lower() == "reconstructor": - raise NotImplementedError() elif node_json["type"].lower() == "decision": - node = Decision(routes=[Route(**route) for route in node_json["routes"]]) + node = Decision( + routes=[Route(**route) for route in node_json["routes"]] + ) elif node_json["type"].lower() == "router": - node = Router(routes=[Route(**route) for route in node_json["routes"]]) + node = Router( + routes=[Route(**route) for route in node_json["routes"]] + ) elif node_json["type"].lower() == "script": - node = Script(fileId=node_json["fileId"], fileMetadata=node_json["fileMetadata"]) + node = Script( + fileId=node_json["fileId"], + fileMetadata=node_json["fileMetadata"], + ) elif node_json["type"].lower() == "output": node = Output() if "inputValues" in node_json: [ node.inputs.create_param( - data_type=DataType(input_param["dataType"]) if "dataType" in input_param else None, + data_type=( + DataType(input_param["dataType"]) + if "dataType" in input_param + else None + ), code=input_param["code"], - value=input_param["value"] if "value" in input_param else None, - is_required=input_param["isRequired"] if "isRequired" in input_param else False, + value=( + input_param["value"] if "value" in input_param else None + ), + is_required=( + input_param["isRequired"] + if "isRequired" in input_param + else False + ), ) for input_param in node_json["inputValues"] if input_param["code"] not in node.inputs @@ -76,10 +95,22 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipe if "outputValues" in node_json: [ node.outputs.create_param( - data_type=DataType(output_param["dataType"]) if "dataType" in output_param else None, + data_type=( + DataType(output_param["dataType"]) + if "dataType" in output_param + else None + ), code=output_param["code"], - value=output_param["value"] if "value" in output_param else None, - is_required=output_param["isRequired"] if "isRequired" in output_param else False, + value=( + output_param["value"] + if "value" in output_param + else None + ), + is_required=( + output_param["isRequired"] + if "isRequired" in output_param + else False + ), ) for output_param in node_json["outputValues"] if output_param["code"] not in node.outputs diff --git a/aixplain/modules/pipeline/designer/__init__.py b/aixplain/modules/pipeline/designer/__init__.py index 6a493aa4..7d880167 100644 --- a/aixplain/modules/pipeline/designer/__init__.py +++ b/aixplain/modules/pipeline/designer/__init__.py @@ -11,6 +11,8 @@ BaseMetric, BareAsset, BareMetric, + BareSegmentor, + BareReconstructor, ) from .pipeline import DesignerPipeline from .base import ( diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index 7e6e1803..fbe27991 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -474,19 +474,11 @@ class BaseReconstructor(AssetNode[TI, TO]): class ReconstructorInputs(Inputs): - data: InputParam = None - - def __init__(self, node: Node): - super().__init__(node) - self.data = self.create_param("data") + pass class ReconstructorOutputs(Outputs): - data: OutputParam = None - - def __init__(self, node: Node): - super().__init__(node) - self.data = self.create_param("data") + pass class BareReconstructor(BaseReconstructor[ReconstructorInputs, ReconstructorOutputs]): From 1b05a0c05bf6f3099f3fdfcca7c545115dda2fd7 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:28:50 -0300 Subject: [PATCH 100/105] ENG-1282 revert (#355) * Removing pipeline versioning fallback * Reverting change in unit test --- aixplain/modules/pipeline/asset.py | 56 --------------------- tests/functional/pipelines/fallback_test.py | 15 ------ 2 files changed, 71 deletions(-) delete mode 100644 tests/functional/pipelines/fallback_test.py diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index 4c557267..7d6f9112 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -161,35 +161,6 @@ def poll(self, poll_url: Text, name: Text = "pipeline_process") -> Dict: resp = {"status": "FAILED"} return resp - def _should_fallback_to_v2(self, response: Dict, version: str) -> bool: - """Determine if the pipeline should fallback to version 2.0 based on the response. - - Args: - response (Dict): The response from the pipeline call. - version (str): The version of the pipeline being used. - - Returns: - bool: True if fallback is needed, False otherwise. - """ - # If the version is not 3.0, no fallback is needed - if version != self.VERSION_3_0: - return False - - should_fallback = False - if "status" not in response or response["status"] == "FAILED": - should_fallback = True - elif response["status"] == "SUCCESS" and ("data" not in response or not response["data"]): - should_fallback = True - # Check for conditions that require a fallback - - if should_fallback: - logging.warning( - f"Pipeline Run Error: Failed to run pipeline {self.id} with version {version}. " - f"Trying with version {self.VERSION_2_0}." - ) - - return should_fallback - def run( self, data: Union[Text, Dict], @@ -198,7 +169,6 @@ def run( timeout: float = 20000.0, wait_time: float = 1.0, batch_mode: bool = True, - version: str = None, **kwargs, ) -> Dict: """Runs a pipeline call. @@ -215,7 +185,6 @@ def run( Returns: Dict: parsed output from pipeline """ - version = version or self.VERSION_3_0 start = time.time() try: response = self.run_async( @@ -223,7 +192,6 @@ def run( data_asset=data_asset, name=name, batch_mode=batch_mode, - version=version, **kwargs, ) @@ -236,16 +204,6 @@ def run( end = time.time() response = self.__polling(poll_url, name=name, timeout=timeout, wait_time=wait_time) - if self._should_fallback_to_v2(response, version): - return self.run( - data, - data_asset=data_asset, - name=name, - batch_mode=batch_mode, - version=self.VERSION_2_0, - **kwargs, - ) - response["version"] = version return response except Exception as e: error_message = f"Error in request for {name}: {str(e)}" @@ -256,7 +214,6 @@ def run( "status": "FAILED", "error": error_message, "elapsed_time": end - start, - "version": version, } def __prepare_payload( @@ -373,7 +330,6 @@ def run_async( data_asset: Optional[Union[Text, Dict]] = None, name: Text = "pipeline_process", batch_mode: bool = True, - version: str = None, **kwargs, ) -> Dict: """Runs asynchronously a pipeline call. @@ -388,7 +344,6 @@ def run_async( Returns: Dict: polling URL in response """ - version = version or self.VERSION_3_0 headers = { "x-api-key": self.api_key, "Content-Type": "application/json", @@ -396,7 +351,6 @@ def run_async( payload = self.__prepare_payload(data=data, data_asset=data_asset) payload["batchmode"] = batch_mode - payload["version"] = version payload.update(kwargs) payload = json.dumps(payload) call_url = f"{self.url}/{self.id}" @@ -433,16 +387,6 @@ def run_async( if resp is not None: response["error"] = resp - if self._should_fallback_to_v2(response, version): - return self.run_async( - data, - data_asset=data_asset, - name=name, - batch_mode=batch_mode, - version=self.VERSION_2_0, - **kwargs, - ) - response["version"] = version return response def update( diff --git a/tests/functional/pipelines/fallback_test.py b/tests/functional/pipelines/fallback_test.py deleted file mode 100644 index 4650bff3..00000000 --- a/tests/functional/pipelines/fallback_test.py +++ /dev/null @@ -1,15 +0,0 @@ -from aixplain.factories import PipelineFactory - - -def test_fallback_to_v2(): - pipeline = PipelineFactory.get("6750535166d4db27e14f07b1") - response = pipeline.run( - "https://homepage.ntu.edu.tw/~karchung/miniconversations/mc1.mp3" - ) - assert response["version"] == "3.0" - assert response["status"] == "SUCCESS" - - pipeline = PipelineFactory.get("6750535166d4db27e14f07b1") - response = pipeline.run("<>") - assert response["version"] == "2.0" - assert response["status"] == "ERROR" From cb8c65af87186a04d8e7f1b99fa7c76a8f2f5431 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20G=C3=BCnd=C3=BCz?= Date: Thu, 9 Jan 2025 11:44:30 +0300 Subject: [PATCH 101/105] Bug 309 utility model update save without code (#358) * if code is s3 urk it downloads * added default aws credentials * added boto3 into reqs * default value added properly * If model exists already do not validate previous code * remove s3 download method and dependencies * added test methods for validate and model exists --- aixplain/modules/model/utility_model.py | 35 +++++-- aixplain/modules/model/utils.py | 1 - aixplain/utils/file_utils.py | 2 +- tests/unit/utility_test.py | 127 ++++++++++++++++++++++-- 4 files changed, 150 insertions(+), 15 deletions(-) diff --git a/aixplain/modules/model/utility_model.py b/aixplain/modules/model/utility_model.py index b5748ca7..474d31fa 100644 --- a/aixplain/modules/model/utility_model.py +++ b/aixplain/modules/model/utility_model.py @@ -115,18 +115,39 @@ def __init__( self.output_examples = output_examples def validate(self): - self.code, inputs, description = parse_code(self.code) + """Validate the Utility Model.""" + description = None + inputs = [] + # check if the model exists and if the code is strring with s3:// + # if not, parse the code and update the description and inputs and do the validation + # if yes, just do the validation on the description and inputs + if not (self._model_exists() and str(self.code).startswith("s3://")): + self.code, inputs, description = parse_code(self.code) + if self.description is None: + self.description = description + if len(self.inputs) == 0: + self.inputs = inputs + for input in self.inputs: + input.validate() + else: + logging.info("Utility Model Already Exists, skipping code validation") + assert description is not None or self.description is not None, "Utility Model Error: Model description is required" - if self.description is None: - self.description = description - if len(self.inputs) == 0: - self.inputs = inputs - for input in self.inputs: - input.validate() assert self.name and self.name.strip() != "", "Name is required" assert self.description and self.description.strip() != "", "Description is required" assert self.code and self.code.strip() != "", "Code is required" + def _model_exists(self): + if self.id is None or self.id == "": + return False + url = urljoin(self.backend_url, f"sdk/models/{self.id}") + headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "application/json"} + logging.info(f"Start service for GET Model - {url} - {headers}") + r = _request_with_retry("get", url, headers=headers) + if r.status_code != 200: + raise Exception() + return True + def to_dict(self): return { "name": self.name, diff --git a/aixplain/modules/model/utils.py b/aixplain/modules/model/utils.py index f3691928..f2cf6209 100644 --- a/aixplain/modules/model/utils.py +++ b/aixplain/modules/model/utils.py @@ -102,7 +102,6 @@ def parse_code(code: Union[Text, Callable]) -> Tuple[Text, List, Text]: str_code = requests.get(code).text else: str_code = code - # assert str_code has a main function if "def main(" not in str_code: raise Exception("Utility Model Error: Code must have a main function") diff --git a/aixplain/utils/file_utils.py b/aixplain/utils/file_utils.py index 0e617397..d39ca2b9 100644 --- a/aixplain/utils/file_utils.py +++ b/aixplain/utils/file_utils.py @@ -153,7 +153,7 @@ def upload_data( raise Exception("File Uploading Error: Failure on Uploading to S3.") -def s3_to_csv(s3_url: Text, aws_credentials: Dict) -> Text: +def s3_to_csv(s3_url: Text, aws_credentials: Optional[Dict[Text, Text]] = {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None}) -> Text: """Convert s3 url to a csv file and download the file in `download_path` Args: diff --git a/tests/unit/utility_test.py b/tests/unit/utility_test.py index cb45597a..cd901ea0 100644 --- a/tests/unit/utility_test.py +++ b/tests/unit/utility_test.py @@ -95,9 +95,13 @@ def test_update_utility_model(): "utility_model_test", ), ): - mock.put(urljoin(config.BACKEND_URL, "sdk/utilities/123"), json={"id": "123"}) + # Mock both the model existence check and update endpoints + model_id = "123" + mock.get(urljoin(config.BACKEND_URL, f"sdk/models/{model_id}"), status_code=200) + mock.put(urljoin(config.BACKEND_URL, f"sdk/utilities/{model_id}"), json={"id": model_id}) + utility_model = UtilityModel( - id="123", + id=model_id, name="utility_model_test", description="utility_model_test", code="def main(originCode: str)", @@ -111,7 +115,7 @@ def test_update_utility_model(): utility_model.description = "updated_description" utility_model.update() - assert utility_model.id == "123" + assert utility_model.id == model_id assert utility_model.description == "updated_description" def test_save_utility_model(): @@ -126,9 +130,13 @@ def test_save_utility_model(): "utility_model_test", ), ): - mock.put(urljoin(config.BACKEND_URL, "sdk/utilities/123"), json={"id": "123"}) + # Mock both the model existence check and the update endpoint + model_id = "123" + mock.get(urljoin(config.BACKEND_URL, f"sdk/models/{model_id}"), status_code=200) + mock.put(urljoin(config.BACKEND_URL, f"sdk/utilities/{model_id}"), json={"id": model_id}) + utility_model = UtilityModel( - id="123", + id=model_id, name="utility_model_test", description="utility_model_test", code="def main(originCode: str)", @@ -137,6 +145,7 @@ def test_save_utility_model(): function=Function.UTILITIES, api_key=config.TEAM_API_KEY, ) + import warnings # it should not trigger any warning with warnings.catch_warnings(record=True) as w: @@ -146,7 +155,7 @@ def test_save_utility_model(): assert len(w) == 0 - assert utility_model.id == "123" + assert utility_model.id == model_id assert utility_model.description == "updated_description" @@ -218,3 +227,109 @@ def main(originCode): with pytest.raises(Exception) as exc_info: parse_code(code) assert str(exc_info.value) == "Utility Model Error: Unsupported input type: list" + +def test_validate_new_model(): + """Test validation for a new model""" + with patch("aixplain.factories.file_factory.FileFactory.to_link", return_value="def main(originCode: str)"): + with patch("aixplain.factories.file_factory.FileFactory.upload", return_value="def main(originCode: str)"): + # Test with valid inputs + utility_model = UtilityModel( + id="", # Empty ID for new model + name="utility_model_test", + description="utility_model_test", + code="def main(originCode: str):\n return originCode", + output_examples="output_description", + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + utility_model.validate() # Should not raise any exception + + # Test with empty name + utility_model.name = "" + with pytest.raises(Exception) as exc_info: + utility_model.validate() + assert str(exc_info.value) == "Name is required" + + # Test with empty description + utility_model.name = "utility_model_test" + utility_model.description = "" + with pytest.raises(Exception) as exc_info: + utility_model.validate() + assert str(exc_info.value) == "Description is required" + + # Test with empty code + utility_model.description = "utility_model_test" + utility_model.code = "" + with pytest.raises(Exception) as exc_info: + utility_model.validate() + + assert str(exc_info.value) == "Utility Model Error: Code must have a main function" + +def test_validate_existing_model(): + """Test validation for an existing model with S3 code""" + with requests_mock.Mocker() as mock: + model_id = "123" + # Mock the model existence check + url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") + mock.get(url, status_code=200) + + utility_model = UtilityModel( + id=model_id, + name="utility_model_test", + description="utility_model_test", + code="s3://bucket/path/to/code", + output_examples="output_description", + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + utility_model.validate() # Should not raise any exception + +def test_model_exists_success(): + """Test _model_exists when model exists""" + with requests_mock.Mocker() as mock: + model_id = "123" + url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") + mock.get(url, status_code=200) + + utility_model = UtilityModel( + id=model_id, + name="utility_model_test", + description="utility_model_test", + code="def main(originCode: str)", + output_examples="output_description", + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + assert utility_model._model_exists() is True + +def test_model_exists_failure(): + """Test _model_exists when model doesn't exist""" + with requests_mock.Mocker() as mock: + model_id = "123" + url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") + mock.get(url, status_code=404) + + utility_model = UtilityModel( + id=model_id, + name="utility_model_test", + description="utility_model_test", + code="def main(originCode: str)", + output_examples="output_description", + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + with pytest.raises(Exception): + utility_model._model_exists() + +def test_model_exists_empty_id(): + """Test _model_exists with empty ID""" + utility_model = UtilityModel( + id="", # Empty ID + name="utility_model_test", + description="utility_model_test", + code="def main(originCode: str)", + output_examples="output_description", + function=Function.UTILITIES, + api_key=config.TEAM_API_KEY, + ) + assert utility_model._model_exists() is False From 336a0f3feefcea493c8eaad57a1d4528e0ef5c4d Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:37:58 -0300 Subject: [PATCH 102/105] ENG-1393: Index improvements (#360) * Index improvements * Remove add/update methods --- aixplain/modules/model/index_model.py | 18 +++--------------- .../model/{document_index.py => record.py} | 2 +- tests/functional/model/run_model_test.py | 4 ++-- tests/unit/index_model_test.py | 14 +++++++------- 4 files changed, 13 insertions(+), 25 deletions(-) rename aixplain/modules/model/{document_index.py => record.py} (96%) diff --git a/aixplain/modules/model/index_model.py b/aixplain/modules/model/index_model.py index 67b3f8f7..0dcc7b4c 100644 --- a/aixplain/modules/model/index_model.py +++ b/aixplain/modules/model/index_model.py @@ -3,7 +3,7 @@ from aixplain.utils import config from aixplain.modules.model.response import ModelResponse from typing import Text, Optional, Union, Dict -from aixplain.modules.model.document_index import DocumentIndex +from aixplain.modules.model.record import Record from typing import List @@ -55,26 +55,14 @@ def search(self, query: str, top_k: int = 10) -> ModelResponse: data = {"action": "search", "data": query, "payload": {"filters": {}, "top_k": top_k}} return self.run(data=data) - def add(self, documents: List[DocumentIndex]) -> ModelResponse: + def upsert(self, documents: List[Record]) -> ModelResponse: payloads = [doc.to_dict() for doc in documents] data = {"action": "ingest", "data": "", "payload": {"payloads": payloads}} response = self.run(data=data) if response.status == ResponseStatus.SUCCESS: response.data = payloads return response - raise Exception(f"Failed to add documents: {response.error_message}") - - def update(self, documents: List[DocumentIndex]) -> ModelResponse: - payloads = [ - {"value": doc.value, "value_type": doc.value_type, "id": str(doc.id), "uri": doc.uri, "attributes": doc.attributes} - for doc in documents - ] - data = {"action": "update", "data": "", "payload": {"payloads": payloads}} - response = self.run(data=data) - if response.status == ResponseStatus.SUCCESS: - response.data = payloads - return response - raise Exception(f"Failed to update documents: {response.error_message}") + raise Exception(f"Failed to upsert documents: {response.error_message}") def count(self) -> float: data = {"action": "count", "data": ""} diff --git a/aixplain/modules/model/document_index.py b/aixplain/modules/model/record.py similarity index 96% rename from aixplain/modules/model/document_index.py rename to aixplain/modules/model/record.py index 12562931..a3c57173 100644 --- a/aixplain/modules/model/document_index.py +++ b/aixplain/modules/model/record.py @@ -2,7 +2,7 @@ from uuid import uuid4 -class DocumentIndex: +class Record: def __init__(self, value: str, value_type: str = "text", id: Optional[str] = None, uri: str = "", attributes: dict = {}): self.value = value self.value_type = value_type diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py index 04e5da0d..16239726 100644 --- a/tests/functional/model/run_model_test.py +++ b/tests/functional/model/run_model_test.py @@ -55,11 +55,11 @@ def test_run_async(): def test_index_model(): - from aixplain.modules.model.document_index import DocumentIndex + from aixplain.modules.model.record import Record from aixplain.factories import IndexFactory index_model = IndexFactory.create("test", "test") - index_model.add([DocumentIndex(value="Hello, world!", value_type="text", uri="", attributes={})]) + index_model.upsert([Record(value="Hello, world!", value_type="text", uri="", attributes={})]) response = index_model.search("Hello") assert str(response.status) == "SUCCESS" assert index_model.count() == 1 diff --git a/tests/unit/index_model_test.py b/tests/unit/index_model_test.py index be9acc6f..dbf698cc 100644 --- a/tests/unit/index_model_test.py +++ b/tests/unit/index_model_test.py @@ -1,6 +1,6 @@ import requests_mock from aixplain.enums import Function, ResponseStatus -from aixplain.modules.model.document_index import DocumentIndex +from aixplain.modules.model.record import Record from aixplain.modules.model.response import ModelResponse from aixplain.modules.model.index_model import IndexModel from aixplain.utils import config @@ -28,8 +28,8 @@ def test_add_success(): mock_response = {"status": "SUCCESS"} mock_documents = [ - DocumentIndex(value="Sample document content 1", value_type="text", id=0, uri="", attributes={}), - DocumentIndex(value="Sample document content 2", value_type="text", id=1, uri="", attributes={}), + Record(value="Sample document content 1", value_type="text", id=0, uri="", attributes={}), + Record(value="Sample document content 2", value_type="text", id=1, uri="", attributes={}), ] with requests_mock.Mocker() as mock: @@ -37,7 +37,7 @@ def test_add_success(): index_model = IndexModel(id=index_id, data=data, name="name", function=Function.SEARCH) - response = index_model.add(mock_documents) + response = index_model.upsert(mock_documents) assert isinstance(response, ModelResponse) assert response.status == ResponseStatus.SUCCESS @@ -47,8 +47,8 @@ def test_update_success(): mock_response = {"status": "SUCCESS"} mock_documents = [ - DocumentIndex(value="Updated document content 1", value_type="text", id=0, uri="", attributes={}), - DocumentIndex(value="Updated document content 2", value_type="text", id=1, uri="", attributes={}), + Record(value="Updated document content 1", value_type="text", id=0, uri="", attributes={}), + Record(value="Updated document content 2", value_type="text", id=1, uri="", attributes={}), ] with requests_mock.Mocker() as mock: @@ -57,7 +57,7 @@ def test_update_success(): index_model = IndexModel(id=index_id, data=data, name="name", function=Function.SEARCH) - response = index_model.update(mock_documents) + response = index_model.upsert(mock_documents) assert isinstance(response, ModelResponse) assert response.status == ResponseStatus.SUCCESS From e16020ba25931d31c9765dd35fadf4508e856ef0 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:17:05 -0300 Subject: [PATCH 103/105] BUG-329: Fix agent validation when using api key (#361) * Setting tool api key * Add api key field to tools --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> --- aixplain/modules/agent/__init__.py | 17 ++--- aixplain/modules/agent/tool/__init__.py | 4 ++ aixplain/modules/agent/tool/model_tool.py | 2 +- aixplain/modules/agent/tool/pipeline_tool.py | 2 +- tests/unit/agent_test.py | 75 +++++++++++++++++++- 5 files changed, 87 insertions(+), 13 deletions(-) diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index 581c7e88..5ff9ff69 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -92,6 +92,8 @@ def __init__( super().__init__(id, name, description, api_key, supplier, version, cost=cost) self.additional_info = additional_info self.tools = tools + for i, _ in enumerate(tools): + self.tools[i].api_key = api_key self.llm_id = llm_id if isinstance(status, str): try: @@ -110,7 +112,7 @@ def validate(self) -> None: ), "Agent Creation Error: Agent name must not contain special characters." try: - llm = ModelFactory.get(self.llm_id) + llm = ModelFactory.get(self.llm_id, api_key=self.api_key) assert llm.function == Function.TEXT_GENERATION, "Large Language Model must be a text generation model." except Exception: raise Exception(f"Large Language Model with ID '{self.llm_id}' not found.") @@ -307,19 +309,19 @@ def delete(self) -> None: message = f"Agent Deletion Error (HTTP {r.status_code}): There was an error in deleting the agent." logging.error(message) raise Exception(f"{message}") - + def update(self) -> None: """Update agent.""" import warnings import inspect + # Get the current call stack stack = inspect.stack() - if len(stack) > 2 and stack[1].function != 'save': + if len(stack) > 2 and stack[1].function != "save": warnings.warn( - "update() is deprecated and will be removed in a future version. " - "Please use save() instead.", + "update() is deprecated and will be removed in a future version. " "Please use save() instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) from aixplain.factories.agent_factory.utils import build_agent @@ -343,10 +345,9 @@ def update(self) -> None: error_msg = f"Agent Update Error (HTTP {r.status_code}): {resp}" raise Exception(error_msg) - def save(self) -> None: """Save the Agent.""" - self.update() + self.update() def deploy(self) -> None: assert self.status == AssetStatus.DRAFT, "Agent must be in draft status to be deployed." diff --git a/aixplain/modules/agent/tool/__init__.py b/aixplain/modules/agent/tool/__init__.py index 01b44dfa..aefa093a 100644 --- a/aixplain/modules/agent/tool/__init__.py +++ b/aixplain/modules/agent/tool/__init__.py @@ -22,6 +22,7 @@ """ from abc import ABC from typing import Optional, Text +from aixplain.utils import config class Tool(ABC): @@ -38,6 +39,7 @@ def __init__( name: Text, description: Text, version: Optional[Text] = None, + api_key: Optional[Text] = config.TEAM_API_KEY, **additional_info, ) -> None: """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. @@ -46,10 +48,12 @@ def __init__( name (Text): name of the tool description (Text): descriptiion of the tool version (Text): version of the tool + api_key (Text): api key of the tool. Defaults to config.TEAM_API_KEY. """ self.name = name self.description = description self.version = version + self.api_key = api_key self.additional_info = additional_info def to_dict(self): diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index 0b1c3179..bdbe0f5f 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -108,7 +108,7 @@ def validate(self) -> Model: try: model = None if self.model is not None: - model = ModelFactory.get(self.model) + model = ModelFactory.get(self.model, api_key=self.api_key) return model except Exception: raise Exception(f"Model Tool Unavailable. Make sure Model '{self.model}' exists or you have access to it.") diff --git a/aixplain/modules/agent/tool/pipeline_tool.py b/aixplain/modules/agent/tool/pipeline_tool.py index 9ea7a5fb..ab3b4311 100644 --- a/aixplain/modules/agent/tool/pipeline_tool.py +++ b/aixplain/modules/agent/tool/pipeline_tool.py @@ -62,6 +62,6 @@ def validate(self): from aixplain.factories.pipeline_factory import PipelineFactory try: - PipelineFactory.get(self.pipeline) + PipelineFactory.get(self.pipeline, api_key=self.api_key) except Exception: raise Exception(f"Pipeline Tool Unavailable. Make sure Pipeline '{self.pipeline}' exists or you have access to it.") diff --git a/tests/unit/agent_test.py b/tests/unit/agent_test.py index 6c17a5b6..10997a75 100644 --- a/tests/unit/agent_test.py +++ b/tests/unit/agent_test.py @@ -9,7 +9,6 @@ from aixplain.modules.agent.utils import process_variables from urllib.parse import urljoin from unittest.mock import patch -import warnings from aixplain.enums.function import Function @@ -198,6 +197,8 @@ def test_to_dict(): description="Test Agent Description", llm_id="6646261c6eb563165658bbb1", tools=[AgentFactory.create_model_tool(function="text-generation")], + api_key="test_api_key", + status=AssetStatus.DRAFT, ) agent_json = agent.to_dict() @@ -207,6 +208,7 @@ def test_to_dict(): assert agent_json["llmId"] == "6646261c6eb563165658bbb1" assert agent_json["assets"][0]["function"] == "text-generation" assert agent_json["assets"][0]["type"] == "model" + assert agent_json["status"] == "draft" def test_update_success(): @@ -256,7 +258,10 @@ def test_update_success(): mock.get(url, headers=headers, json=model_ref_response) # Capture warnings - with pytest.warns(DeprecationWarning, match="update\(\) is deprecated and will be removed in a future version. Please use save\(\) instead."): + with pytest.warns( + DeprecationWarning, + match="update\(\) is deprecated and will be removed in a future version. Please use save\(\) instead.", + ): agent.update() assert agent.id == ref_response["id"] @@ -265,6 +270,7 @@ def test_update_success(): assert agent.llm_id == ref_response["llmId"] assert agent.tools[0].function.value == ref_response["assets"][0]["function"] + def test_save_success(): agent = Agent( id="123", @@ -310,8 +316,9 @@ def test_save_success(): "pricing": {"currency": "USD", "value": 0.0}, } mock.get(url, headers=headers, json=model_ref_response) - + import warnings + # Capture warnings with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # Trigger all warnings @@ -328,6 +335,7 @@ def test_save_success(): assert agent.llm_id == ref_response["llmId"] assert agent.tools[0].function.value == ref_response["assets"][0]["function"] + def test_run_success(): agent = Agent("123", "Test Agent", "Sample Description") url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}/run") @@ -369,3 +377,64 @@ def test_fail_utilities_without_model(): with pytest.raises(Exception) as exc_info: AgentFactory.create(name="Test", tools=[ModelTool(function=Function.UTILITIES)], llm_id="6646261c6eb563165658bbb1") assert str(exc_info.value) == "Agent Creation Error: Utility function must be used with an associated model." + + +def test_agent_api_key_propagation(): + """Test that the api_key is properly propagated to tools when creating an agent""" + custom_api_key = "custom_test_key" + tool = AgentFactory.create_model_tool(function="text-generation") + agent = Agent(id="123", name="Test Agent", description="Test Description", tools=[tool], api_key=custom_api_key) + + # Check that the agent has the correct api_key + assert agent.api_key == custom_api_key + # Check that the tool received the agent's api_key + assert agent.tools[0].api_key == custom_api_key + + +def test_agent_default_api_key(): + """Test that the default api_key is used when none is provided""" + tool = AgentFactory.create_model_tool(function="text-generation") + agent = Agent(id="123", name="Test Agent", description="Test Description", tools=[tool]) + + # Check that the agent has the default api_key + assert agent.api_key == config.TEAM_API_KEY + # Check that the tool has the default api_key + assert agent.tools[0].api_key == config.TEAM_API_KEY + + +def test_agent_multiple_tools_api_key(): + """Test that api_key is properly propagated to multiple tools""" + custom_api_key = "custom_test_key" + tools = [ + AgentFactory.create_model_tool(function="text-generation"), + AgentFactory.create_python_interpreter_tool(), + AgentFactory.create_custom_python_code_tool( + code="def main(query: str) -> str:\n return 'Hello'", description="Test Tool" + ), + ] + + agent = Agent(id="123", name="Test Agent", description="Test Description", tools=tools, api_key=custom_api_key) + + # Check that all tools received the agent's api_key + for tool in agent.tools: + assert tool.api_key == custom_api_key + + +def test_agent_api_key_in_requests(): + """Test that the api_key is properly used in API requests""" + custom_api_key = "custom_test_key" + agent = Agent(id="123", name="Test Agent", description="Test Description", api_key=custom_api_key) + + with requests_mock.Mocker() as mock: + url = agent.url + # The custom api_key should be used in the headers + headers = {"x-api-key": custom_api_key, "Content-Type": "application/json"} + ref_response = {"data": "test_url", "status": "IN_PROGRESS"} + mock.post(url, headers=headers, json=ref_response) + + response = agent.run_async(data={"query": "Test query"}) + + # Verify that the request was made with the correct api_key + assert mock.last_request.headers["x-api-key"] == custom_api_key + assert response["status"] == "IN_PROGRESS" + assert response["url"] == "test_url" From cf3dad0327bd7f2a44c7dd0a914b6ab86031fff3 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:37:29 -0300 Subject: [PATCH 104/105] Get error in other field (#359) --- aixplain/modules/model/response.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aixplain/modules/model/response.py b/aixplain/modules/model/response.py index 1576c1f4..9cbbe4d8 100644 --- a/aixplain/modules/model/response.py +++ b/aixplain/modules/model/response.py @@ -22,6 +22,10 @@ def __init__( self.data = data self.details = details self.completed = completed + if error_message == "": + error_message = kwargs.get("error", "") + if "supplierError" in kwargs: + error_message = f"{error_message} - {kwargs.get('supplierError', '')}" self.error_message = error_message self.used_credits = used_credits self.run_time = run_time From 62a904c513e180561a757dbe044b75fb8852de85 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:38:46 -0300 Subject: [PATCH 105/105] ENG-1393: index improvements (#363) * Index improvements * Remove add/update methods * Adding filter options to search * Functional tests for upsert as update --- aixplain/modules/model/index_model.py | 4 ++-- tests/functional/model/run_model_test.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/aixplain/modules/model/index_model.py b/aixplain/modules/model/index_model.py index 0dcc7b4c..fae597b8 100644 --- a/aixplain/modules/model/index_model.py +++ b/aixplain/modules/model/index_model.py @@ -51,8 +51,8 @@ def __init__( self.url = config.MODELS_RUN_URL self.backend_url = config.BACKEND_URL - def search(self, query: str, top_k: int = 10) -> ModelResponse: - data = {"action": "search", "data": query, "payload": {"filters": {}, "top_k": top_k}} + def search(self, query: str, top_k: int = 10, filters: Dict = {}) -> ModelResponse: + data = {"action": "search", "data": query, "payload": {"filters": filters, "top_k": top_k}} return self.run(data=data) def upsert(self, documents: List[Record]) -> ModelResponse: diff --git a/tests/functional/model/run_model_test.py b/tests/functional/model/run_model_test.py index 16239726..47bd4f12 100644 --- a/tests/functional/model/run_model_test.py +++ b/tests/functional/model/run_model_test.py @@ -55,12 +55,19 @@ def test_run_async(): def test_index_model(): + from uuid import uuid4 from aixplain.modules.model.record import Record from aixplain.factories import IndexFactory - index_model = IndexFactory.create("test", "test") - index_model.upsert([Record(value="Hello, world!", value_type="text", uri="", attributes={})]) + index_model = IndexFactory.create(name=str(uuid4()), description=str(uuid4())) + index_model.upsert([Record(value="Hello, world!", value_type="text", uri="", id="1", attributes={})]) response = index_model.search("Hello") assert str(response.status) == "SUCCESS" + assert "world" in response.data.lower() + assert index_model.count() == 1 + index_model.upsert([Record(value="Hello, aiXplain!", value_type="text", uri="", id="1", attributes={})]) + response = index_model.search("aiXplain") + assert str(response.status) == "SUCCESS" + assert "aixplain" in response.data.lower() assert index_model.count() == 1 index_model.delete()