diff --git a/aixplain/enums/asset_status.py b/aixplain/enums/asset_status.py new file mode 100644 index 00000000..134af26e --- /dev/null +++ b/aixplain/enums/asset_status.py @@ -0,0 +1,43 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Duraikrishna Selvaraju, Thiago Castro Ferreira, Shreyas Sharma and Lucas Pavanelli +Date: February 21st 2024 +Description: + Asset Enum +""" + +from enum import Enum +from typing import Text + +class AssetStatus(Text, Enum): + HIDDEN = 'hidden' + SCHEDULED = 'scheduled' + ONBOARDING = 'onboarding' + ONBOARDED = 'onboarded' + PENDING = 'pending' + FAILED = 'failed' + TRAINING = 'training' + REJECTED = 'rejected' + ENABLING = 'enabling' + DELETING = 'deleting' + DISABLED = 'disabled' + DELETED = 'deleted' + IN_PROGRESS = 'in_progress' + COMPLETED = 'completed' + CANCELING = 'canceling' + CANCELED = 'canceled' \ No newline at end of file diff --git a/aixplain/factories/benchmark_factory.py b/aixplain/factories/benchmark_factory.py index 88d2411b..57d4a833 100644 --- a/aixplain/factories/benchmark_factory.py +++ b/aixplain/factories/benchmark_factory.py @@ -26,6 +26,7 @@ import json import pandas as pd from pathlib import Path +from aixplain.enums.supplier import Supplier from aixplain.modules import Dataset, Metric, Model from aixplain.modules.benchmark_job import BenchmarkJob from aixplain.modules.benchmark import Benchmark @@ -237,3 +238,24 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: error_message = f"Listing Normalization Options: Error in getting Normalization Options: {e}" logging.error(error_message, exc_info=True) return [] + + @classmethod + def get_benchmark_job_scores(cls, job_id): + def __get_model_name(model_id): + model = ModelFactory.get(model_id) + supplier = str(model.supplier) + try: + if isinstance(supplier, Supplier): + name = f"{supplier.name}" + else: + name = f"{eval(supplier)['name']}" + except Exception as e: + logging.error(f"{e}") + name = f"{supplier}" + if model.version is not None: + name = f"{name}({model.version})" + return name + benchmarkJob = cls.get_job(job_id) + scores_df = benchmarkJob.get_scores() + scores_df["Model"] = scores_df["Model"].apply(lambda x: __get_model_name(x)) + return scores_df \ No newline at end of file diff --git a/aixplain/factories/pipeline_factory.py b/aixplain/factories/pipeline_factory.py index 078bcae6..404a5556 100644 --- a/aixplain/factories/pipeline_factory.py +++ b/aixplain/factories/pipeline_factory.py @@ -22,6 +22,7 @@ """ import json import logging +import os from typing import Dict, List, Optional, Text, Union from aixplain.enums.data_type import DataType from aixplain.enums.function import Function @@ -207,7 +208,7 @@ def list( output_data_types = [output_data_types] payload["inputDataTypes"] = [data_type.value for data_type in output_data_types] - logging.info(f"Start service for POST List Dataset - {url} - {headers} - {json.dumps(payload)}") + logging.info(f"Start service for POST List Pipeline - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) resp = r.json() @@ -220,3 +221,40 @@ def list( for pipeline in results: pipelines.append(cls.__from_response(pipeline)) return {"results": pipelines, "page_total": page_total, "page_number": page_number, "total": total} + + @classmethod + def create(cls, name: Text, pipeline: Union[Text, Dict], status: Text = "draft") -> Pipeline: + """Pipeline Creation + + Args: + name (Text): Pipeline Name + pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file + status (Text, optional): Status of the pipeline. Currently only draft pipelines can be saved. Defaults to "draft". + + Raises: + Exception: Currently just the creation of draft pipelines are supported + + Returns: + Pipeline: instance of the new pipeline + """ + try: + assert status == "draft", "Pipeline Creation Error: Currently just the creation of draft pipelines are supported." + if isinstance(pipeline, str) is True: + _, ext = os.path.splitext(pipeline) + assert ( + os.path.exists(pipeline) and ext == ".json" + ), "Pipeline Creation Error: Make sure the pipeline to be save is in a JSON file." + with open(pipeline) as f: + pipeline = json.load(f) + + # prepare payload + payload = {"name": name, "status": "draft", "architecture": pipeline} + url = urljoin(cls.backend_url, "sdk/pipelines") + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for POST Create Pipeline - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) + response = r.json() + + return Pipeline(response["id"], name, config.TEAM_API_KEY) + except Exception as e: + raise Exception(e) diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index 0902eaf4..bb9e696b 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -29,5 +29,6 @@ from .model import Model from .pipeline import Pipeline from .finetune import Finetune, FinetuneCost +from .finetune.status import FinetuneStatus from .benchmark import Benchmark from .benchmark_job import BenchmarkJob diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index 8531127a..7dae2d96 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -92,3 +92,125 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}" logging.error(error_message, exc_info=True) raise Exception(error_message) + + def __simplify_scores(self, scores): + simplified_score_list = [] + for model_id, model_info in scores.items(): + model_scores = model_info["rawScores"] + # model = Mode + row = {"Model": model_id} + for score_info in model_scores: + row[score_info["longName"]] = score_info["average"] + simplified_score_list.append(row) + return simplified_score_list + + + + + def get_scores(self, return_simplified=True, return_as_dataframe=True): + try: + resp = self._fetch_current_response(self.id) + iterations = resp.get("iterations", []) + scores = {} + for iteration_info in iterations: + model_id = iteration_info["pipeline"] + model_info = { + "creditsUsed" : round(iteration_info.get("credits", 0),5), + "timeSpent" : round(iteration_info.get("runtime", 0),2), + "status" : iteration_info["status"], + "rawScores" : iteration_info["scores"], + } + scores[model_id] = model_info + + if return_simplified: + simplified_scores = self.__simplify_scores(scores) + if return_as_dataframe: + simplified_scores = pd.DataFrame(simplified_scores) + return simplified_scores + else: + return scores + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + + + def get_failuire_rate(self, return_as_dataframe=True): + try: + scores = self.get_scores(return_simplified=False) + failure_rates = {} + for model_id, model_info in scores.items(): + if len(model_info["rawScores"]) == 0: + failure_rates[model_id] = 0 + continue + score_info = model_info["rawScores"][0] + num_succesful = score_info["count"] + num_failed = score_info["failedSegmentsCount"] + failuire_rate = (num_failed * 100) / (num_succesful+num_failed) + failure_rates[model_id] = failuire_rate + if return_as_dataframe: + df = pd.DataFrame() + df["Model"] = list(failure_rates.keys()) + df["Failuire Rate"] = list(failure_rates.values()) + return df + else: + return failure_rates + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + + def get_all_explanations(self): + try: + resp = self._fetch_current_response(self) + raw_explanations = resp.get("explanation", {}) + if "metricInDependent" not in raw_explanations: + raw_explanations["metricInDependent"] = [] + if "metricDependent" not in raw_explanations: + raw_explanations["metricDependent"] = [] + return raw_explanations + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + + def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False): + try: + raw_explanations = self.get_all_explanations() + if metric_dependant: + localized_explanations = raw_explanations["metricDependent"] + if len(localized_explanations) == 0: + localized_explanations = {} + else: + grouped_explanations = {} + task_list = [] + first_explanation = localized_explanations[0] + for task in first_explanation: + if task not in ["scoreId", "datasetId"]: + task_list.append(task) + + if group_by_task: + for task in task_list: + task_explanation = {} + for explanation_item in localized_explanations: + item_task_explanation = explanation_item[task] + identifier = explanation_item["scoreId"] + task_explanation[identifier] = item_task_explanation + grouped_explanations[task] = task_explanation + else: + for explanation_item in localized_explanations: + identifier = explanation_item["scoreId"] + grouped_explanations[identifier] = explanation_item + localized_explanations = grouped_explanations + else: + localized_explanations = raw_explanations["metricInDependent"] + if len(localized_explanations) == 0: + localized_explanations = {} + else: + localized_explanations = localized_explanations[0] + return localized_explanations + + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) \ No newline at end of file diff --git a/aixplain/modules/finetune/status.py b/aixplain/modules/finetune/status.py new file mode 100644 index 00000000..4994ce55 --- /dev/null +++ b/aixplain/modules/finetune/status.py @@ -0,0 +1,36 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Duraikrishna Selvaraju, Thiago Castro Ferreira, Shreyas Sharma and Lucas Pavanelli +Date: February 21st 2024 +Description: + FinetuneCost Class +""" + +from aixplain.enums.asset_status import AssetStatus +from dataclasses import dataclass +from dataclasses_json import dataclass_json +from typing import Optional, Text + +@dataclass_json +@dataclass +class FinetuneStatus(object): + status: "AssetStatus" + model_status: "AssetStatus" + epoch: Optional[float] = None + training_loss: Optional[float] = None + validation_loss: Optional[float] = None diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index 8d8844f0..04a0bdd7 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -23,6 +23,7 @@ from typing import Optional, Text, List, Union from aixplain.modules.asset import Asset + from aixplain.utils.file_utils import _request_with_retry from aixplain.factories.model_factory import ModelFactory @@ -92,6 +93,7 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional source (Optional[Union[str, List[str]]], optional): Can give a single source or a list of sources for metric calculation. Defaults to None. reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None. """ + from aixplain.factories.model_factory import ModelFactory model = ModelFactory.get(self.id) payload = { "function": self.function, diff --git a/aixplain/modules/model.py b/aixplain/modules/model.py index 0804af29..fc3a82cd 100644 --- a/aixplain/modules/model.py +++ b/aixplain/modules/model.py @@ -20,7 +20,6 @@ Description: Model Class """ - import time import json import logging @@ -251,23 +250,65 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param response["error"] = msg return response - def check_finetune_status(self): + def check_finetune_status(self, after_epoch: Optional[int] = None): """Check the status of the FineTune model. + Args: + after_epoch (Optional[int], optional): status after a given epoch. Defaults to None. + Raises: Exception: If the 'TEAM_API_KEY' is not provided. Returns: - str: The status of the FineTune model. + FinetuneStatus: The status of the FineTune model. """ + from aixplain.enums.asset_status import AssetStatus + from aixplain.modules.finetune.status import FinetuneStatus headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + resp = None try: - url = urljoin(self.backend_url, f"sdk/models/{self.id}") + url = urljoin(self.backend_url, f"sdk/finetune/{self.id}/ml-logs") logging.info(f"Start service for GET Check FineTune status Model - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() - status = resp["status"] - logging.info(f"Response for GET Check FineTune status Model - Id {self.id} / Status {status}.") + finetune_status = AssetStatus(resp["finetuneStatus"]) + model_status = AssetStatus(resp["modelStatus"]) + logs = sorted(resp["logs"], key=lambda x: float(x["epoch"])) + + target_epoch = None + if after_epoch is not None: + logs = [log for log in logs if float(log["epoch"]) > after_epoch] + if len(logs) > 0: + target_epoch = float(logs[0]["epoch"]) + elif len(logs) > 0: + target_epoch = float(logs[-1]["epoch"]) + + if target_epoch is not None: + log = None + for log_ in logs: + if int(log_["epoch"]) == target_epoch: + if log is None: + log = log_ + else: + if log_["trainLoss"] is not None: + log["trainLoss"] = log_["trainLoss"] + if log_["evalLoss"] is not None: + log["evalLoss"] = log_["evalLoss"] + + status = FinetuneStatus( + status=finetune_status, + model_status=model_status, + epoch=float(log["epoch"]) if "epoch" in log and log["epoch"] is not None else None, + training_loss=float(log["trainLoss"]) if "trainLoss" in log and log["trainLoss"] is not None else None, + validation_loss=float(log["evalLoss"]) if "evalLoss" in log and log["evalLoss"] is not None else None, + ) + else: + status = FinetuneStatus( + status=finetune_status, + model_status=model_status, + ) + + logging.info(f"Response for GET Check FineTune status Model - Id {self.id} / Status {status.status.value}.") return status except Exception as e: message = "" diff --git a/aixplain/modules/pipeline.py b/aixplain/modules/pipeline.py index 011f3114..3de49756 100644 --- a/aixplain/modules/pipeline.py +++ b/aixplain/modules/pipeline.py @@ -23,11 +23,13 @@ import time import json +import os import logging from aixplain.modules.asset import Asset from aixplain.utils import config from aixplain.utils.file_utils import _request_with_retry from typing import Dict, Optional, Text, Union +from urllib.parse import urljoin class Pipeline(Asset): @@ -306,3 +308,32 @@ def run_async( if resp is not None: response["error"] = resp return response + + def update(self, pipeline: Union[Text, Dict]): + """Update Pipeline + + Args: + pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file + + Raises: + Exception: Make sure the pipeline to be save is in a JSON file. + """ + try: + if isinstance(pipeline, str) is True: + _, ext = os.path.splitext(pipeline) + assert ( + os.path.exists(pipeline) and ext == ".json" + ), "Pipeline Update Error: Make sure the pipeline to be save is in a JSON file." + with open(pipeline) as f: + pipeline = json.load(f) + + # prepare payload + payload = {"name": self.name, "status": "draft", "architecture": pipeline} + url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("put", url, headers=headers, json=payload) + response = r.json() + logging.info(f"Pipeline {response['id']} Updated.") + except Exception as e: + raise Exception(e) diff --git a/docs/user/user_doc.md b/docs/user/user_doc.md index 00034240..5b19c273 100644 --- a/docs/user/user_doc.md +++ b/docs/user/user_doc.md @@ -352,9 +352,12 @@ Once a `FineTune` is created (refer to the [section above](#creating-a-finetune) ```python finetune_model = finetune.start() ``` -We receive a model that we can check the fine-tuning status: +We receive a model that we can check the fine-tuning info (status, epoch, training and validation losses): ```python -status = finetune_model.check_finetune_status() +finetune_model_info = finetune_model.check_finetune_status() +epoch = finetune_model_info.epoch +training_loss = finetune_model_info.training_loss +validation_loss = finetune_model_info.validation_loss ``` Status can be one of the following: `onboarding`, `onboarded`, `hidden`, `training`, `deleted`, `enabling`, `disabled`, `failed`, `deleting`. diff --git a/pyproject.toml b/pyproject.toml index 0691c76c..9ad67878 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.4" +version = "0.2.5rc" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index 0231d7cb..94693f05 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -76,13 +76,13 @@ def test_end2end(run_input_map): assert "inferenceCost" in cost_map finetune_model = finetune.start() start, end = time.time(), time.time() - status = finetune_model.check_finetune_status() + status = finetune_model.check_finetune_status().model_status.value while status != "onboarded" and (end - start) < TIMEOUT: - status = finetune_model.check_finetune_status() + status = finetune_model.check_finetune_status().model_status.value assert status != "failed" time.sleep(5) end = time.time() - assert finetune_model.check_finetune_status() == "onboarded" + assert finetune_model.check_finetune_status().model_status.value == "onboarded" result = finetune_model.run(run_input_map["inference_data"]) print(f"Result: {result}") assert result is not None @@ -128,4 +128,4 @@ def test_prompt_validator(validate_prompt_input_map): finetune = FinetuneFactory.create( str(uuid.uuid4()), dataset_list, model, prompt_template=validate_prompt_input_map["prompt_template"] ) - assert exc_info.type is AssertionError + assert exc_info.type is AssertionError \ No newline at end of file diff --git a/tests/functional/model/hf_onboarding_test.py b/tests/functional/model/hf_onboarding_test.py index b70b0580..47a38361 100644 --- a/tests/functional/model/hf_onboarding_test.py +++ b/tests/functional/model/hf_onboarding_test.py @@ -1,11 +1,14 @@ __author__ = "michaellam" -import time +import pytest +import time from aixplain.factories.model_factory import ModelFactory from tests.test_utils import delete_asset from aixplain.utils import config + +@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_deploy_model(): # Start the deployment model_name = "Test Model" @@ -26,6 +29,8 @@ def test_deploy_model(): # Clean up delete_asset(model_id, config.TEAM_API_KEY) + +@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_nonexistent_model(): # Start the deployment model_name = "Test Model" @@ -34,6 +39,8 @@ def test_nonexistent_model(): assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" + +@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_size_limit(): # Start the deployment model_name = "Test Model" @@ -42,10 +49,12 @@ def test_size_limit(): assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" + +@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_gated_model(): # Start the deployment model_name = "Test Model" repo_id = "meta-llama/Llama-2-7b-hf" response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") assert response["statusCode"] == 400 - assert response["message"] == "err.unable_to_onboard_model" \ No newline at end of file + assert response["message"] == "err.unable_to_onboard_model" diff --git a/tests/functional/pipelines/create_test.py b/tests/functional/pipelines/create_test.py new file mode 100644 index 00000000..f2c1a9c9 --- /dev/null +++ b/tests/functional/pipelines/create_test.py @@ -0,0 +1,64 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2022 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import json +import pytest +from aixplain.factories import PipelineFactory +from aixplain.modules import Pipeline +from uuid import uuid4 + + +def test_create_pipeline_from_json(): + pipeline_json = "tests/functional/pipelines/data/pipeline.json" + pipeline_name = str(uuid4()) + pipeline = PipelineFactory.create(name=pipeline_name, pipeline=pipeline_json) + + assert isinstance(pipeline, Pipeline) + assert pipeline.id != "" + + +def test_create_pipeline_from_string(): + pipeline_json = "tests/functional/pipelines/data/pipeline.json" + with open(pipeline_json) as f: + pipeline_dict = json.load(f) + + pipeline_name = str(uuid4()) + pipeline = PipelineFactory.create(name=pipeline_name, pipeline=pipeline_dict) + + assert isinstance(pipeline, Pipeline) + assert pipeline.id != "" + + +def test_update_pipeline(): + pipeline_json = "tests/functional/pipelines/data/pipeline.json" + with open(pipeline_json) as f: + pipeline_dict = json.load(f) + + pipeline_name = str(uuid4()) + pipeline = PipelineFactory.create(name=pipeline_name, pipeline=pipeline_dict) + + pipeline.update(pipeline=pipeline_json) + assert isinstance(pipeline, Pipeline) + assert pipeline.id != "" + + +def test_create_pipeline_wrong_path(): + pipeline_name = str(uuid4()) + + with pytest.raises(Exception): + pipeline = PipelineFactory.create(name=pipeline_name, pipeline="/") diff --git a/tests/functional/pipelines/data/pipeline.json b/tests/functional/pipelines/data/pipeline.json new file mode 100644 index 00000000..f48d6d4d --- /dev/null +++ b/tests/functional/pipelines/data/pipeline.json @@ -0,0 +1,100 @@ +{ + "links": [ + { + "from": 0, + "to": 1, + "paramMapping": [ + { + "from": "input", + "to": "text" + } + ] + }, + { + "from": 1, + "to": 2, + "paramMapping": [ + { + "from": "data", + "to": "text" + } + ] + }, + { + "from": 2, + "to": 3, + "paramMapping": [ + { + "from": "data", + "to": "output" + } + ] + } + ], + "nodes": [ + { + "number": 0, + "type": "INPUT" + }, + { + "number": 1, + "type": "ASSET", + "function": "sentiment-analysis", + "inputValues": [ + { + "code": "language", + "value": "en" + }, + { + "code": "text", + "dataType": "text" + } + ], + "assetId": "6172874f720b09325cbcdc33", + "assetType": "MODEL", + "autoSelectOptions": [], + "functionType": "AI", + "status": "Exists", + "outputValues": [ + { + "code": "data", + "dataType": "label" + } + ] + }, + { + "number": 2, + "type": "ASSET", + "function": "translation", + "inputValues": [ + { + "code": "sourcelanguage", + "value": "en" + }, + { + "code": "targetlanguage", + "value": "es" + }, + { + "code": "text", + "dataType": "text" + } + ], + "assetId": "61b097551efecf30109d3316", + "assetType": "MODEL", + "autoSelectOptions": [], + "functionType": "AI", + "status": "Exists", + "outputValues": [ + { + "code": "data", + "dataType": "text" + } + ] + }, + { + "number": 3, + "type": "OUTPUT" + } + ] +} \ No newline at end of file diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index 99e66a10..e7af6c4e 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -107,3 +107,61 @@ def test_run_multipipe_with_datasets(batchmode: bool): **{"batchmode": batchmode} ) assert response["status"] == "SUCCESS" + + +@pytest.mark.parametrize("batchmode", [True, False]) +def test_run_segment_reconstruct(batchmode: bool): + pipeline = PipelineFactory.list(query="Segmentation/Reconstruction Functional Test - DO NOT DELETE")["results"][0] + response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", **{"batchmode": batchmode}) + + assert response["status"] == "SUCCESS" + output = response["data"][0] + assert output["label"] == "Output 1" + + +@pytest.mark.parametrize("batchmode", [True, False]) +def test_run_metric(batchmode: bool): + pipeline = PipelineFactory.list(query="ASR Metric Functional Test - DO NOT DELETE")["results"][0] + response = pipeline.run({ + "AudioInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "ReferenceInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt" + }, **{"batchmode": batchmode}) + + assert response["status"] == "SUCCESS" + assert len(response["data"]) == 2 + assert response["data"][0]["label"] in ["TranscriptOutput", "ScoreOutput"] + assert response["data"][1]["label"] in ["TranscriptOutput", "ScoreOutput"] + + +@pytest.mark.parametrize( + "batchmode,input_data,output_data", + [ + (True, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"), + (False, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"), + (True, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput"), + (False, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput") + ] +) +def test_run_router(batchmode: bool, input_data: str, output_data: str): + pipeline = PipelineFactory.list(query="Router Test - DO NOT DELETE")["results"][0] + response = pipeline.run(input_data, **{"batchmode": batchmode}) + + assert response["status"] == "SUCCESS" + assert response["data"][0]["label"] == output_data + + +@pytest.mark.parametrize( + "batchmode,input_data,output_data", + [ + (True, "I love it.", "PositiveOutput"), + (False, "I love it.", "PositiveOutput"), + (True, "I hate it.", "NegativeOutput"), + (False, "I hate it.", "NegativeOutput") + ] +) +def test_run_decision(batchmode: bool, input_data: str, output_data: str): + pipeline = PipelineFactory.list(query="Decision Test - DO NOT DELETE")["results"][0] + response = pipeline.run(input_data, **{"batchmode": batchmode}) + + assert response["status"] == "SUCCESS" + assert response["data"][0]["label"] == output_data \ No newline at end of file diff --git a/tests/image_upload_e2e_test.py b/tests/image_upload_e2e_test.py index 5e46c325..0e2ccbc5 100644 --- a/tests/image_upload_e2e_test.py +++ b/tests/image_upload_e2e_test.py @@ -6,8 +6,10 @@ from tests.test_utils import delete_asset, delete_service_account from aixplain.utils import config import docker -import os +import pytest + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_and_upload_model(): # List the host machines host_response = ModelFactory.list_host_machines() @@ -44,7 +46,7 @@ def test_create_and_upload_model(): # Log into the image repository. login_response = ModelFactory.asset_repo_login() - + assert login_response["username"] == "AWS" assert login_response["registry"] == "535945872701.dkr.ecr.us-east-1.amazonaws.com" assert "password" in login_response.keys() @@ -55,12 +57,12 @@ def test_create_and_upload_model(): # Push an image to ECR # os.system("aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 535945872701.dkr.ecr.us-east-1.amazonaws.com") - low_level_client = docker.APIClient(base_url='unix://var/run/docker.sock') + low_level_client = docker.APIClient(base_url="unix://var/run/docker.sock") # low_level_client.pull("535945872701.dkr.ecr.us-east-1.amazonaws.com/bash") # low_level_client.tag("535945872701.dkr.ecr.us-east-1.amazonaws.com/bash", f"{registry}/{repo_name}") low_level_client.pull("bash") low_level_client.tag("bash", f"{registry}/{repo_name}") - low_level_client.push(f"{registry}/{repo_name}", auth_config={"username":username, "password":password}) + low_level_client.push(f"{registry}/{repo_name}", auth_config={"username": username, "password": password}) # Send an email to finalize onboarding process ModelFactory.onboard_model(model_id, "latest", "fake_hash") diff --git a/tests/image_upload_functional_test.py b/tests/image_upload_functional_test.py index 0d6aa219..b9dd3ebf 100644 --- a/tests/image_upload_functional_test.py +++ b/tests/image_upload_functional_test.py @@ -6,6 +6,8 @@ from aixplain.factories.model_factory import ModelFactory import pytest + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_login(): response = ModelFactory.asset_repo_login() assert response["username"] == "AWS" @@ -15,6 +17,8 @@ def test_login(): # Test cleanup delete_service_account(config.TEAM_API_KEY) + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_asset_repo(): with open(Path("tests/test_requests/create_asset_request.json")) as f: mock_register_payload = json.load(f) @@ -33,6 +37,8 @@ def test_create_asset_repo(): # Test cleanup delete_asset(response["id"], config.TEAM_API_KEY) + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_list_host_machines(): response = ModelFactory.list_host_machines() for hosting_machine_dict in response: @@ -42,6 +48,8 @@ def test_list_host_machines(): assert "memory" in hosting_machine_dict.keys() assert "hourlyCost" in hosting_machine_dict.keys() + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_get_functions(): # Verbose response = ModelFactory.list_functions(True) @@ -53,7 +61,7 @@ def test_get_functions(): assert "name" in item.keys() # Non-verbose - response = ModelFactory.list_functions() # Not verbose by default + response = ModelFactory.list_functions() # Not verbose by default items = response["items"] for item in items: assert "output" not in item.keys() @@ -61,6 +69,7 @@ def test_get_functions(): assert "id" not in item.keys() assert "name" in item.keys() + @pytest.mark.skip(reason="Not included in first release") def list_image_repo_tags(): response = ModelFactory.list_image_repo_tags() diff --git a/tests/image_upload_test.py b/tests/image_upload_test.py index bb120533..fb919171 100644 --- a/tests/image_upload_test.py +++ b/tests/image_upload_test.py @@ -13,8 +13,9 @@ API_FIXED_HEADER = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_login(): - url = urljoin(config.BACKEND_URL, f"sdk/ecr/login") + url = urljoin(config.BACKEND_URL, f"sdk/ecr/login") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/login_response.json")) as f: mock_json = json.load(f) @@ -22,8 +23,10 @@ def test_login(): creds = ModelFactory.asset_repo_login(config.TEAM_API_KEY) assert creds == mock_json + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_asset_repo(): - url_register = urljoin(config.BACKEND_URL, f"sdk/models/register") + url_register = urljoin(config.BACKEND_URL, f"sdk/models/register") url_function = urljoin(config.BACKEND_URL, f"sdk/functions") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/create_asset_repo_response.json")) as f: @@ -32,12 +35,15 @@ def test_create_asset_repo(): with open(Path("tests/mock_responses/list_functions_response.json")) as f: mock_json_functions = json.load(f) mock.get(url_function, headers=AUTH_FIXED_HEADER, json=mock_json_functions) - model_id = ModelFactory.create_asset_repo("mock_name", "mock_machines", "mock_version", - "mock_description", "Speech Recognition", "en", config.TEAM_API_KEY) + model_id = ModelFactory.create_asset_repo( + "mock_name", "mock_machines", "mock_version", "mock_description", "Speech Recognition", "en", config.TEAM_API_KEY + ) assert model_id == mock_json_register + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_list_host_machines(): - url = urljoin(config.BACKEND_URL, f"sdk/hosting-machines") + url = urljoin(config.BACKEND_URL, f"sdk/hosting-machines") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/list_host_machines_response.json")) as f: mock_json = json.load(f) @@ -49,8 +55,10 @@ def test_list_host_machines(): for key in machine_dict.keys(): assert machine_dict[key] == mock_json_dict[key] + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_get_functions(): - url = urljoin(config.BACKEND_URL, f"sdk/functions") + url = urljoin(config.BACKEND_URL, f"sdk/functions") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/list_functions_response.json")) as f: mock_json = json.load(f) @@ -58,10 +66,11 @@ def test_get_functions(): functions = ModelFactory.list_functions(config.TEAM_API_KEY) assert functions == mock_json + @pytest.mark.skip(reason="Not currently supported.") def test_list_image_repo_tags(): model_id = "mock_id" - url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}/images") + url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}/images") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/list_image_repo_tags_response.json")) as f: mock_json = json.load(f) diff --git a/tests/unit/finetune_test.py b/tests/unit/finetune_test.py index d95089ea..13287c32 100644 --- a/tests/unit/finetune_test.py +++ b/tests/unit/finetune_test.py @@ -29,6 +29,7 @@ from aixplain.modules import Model, Finetune from aixplain.modules.finetune import Hyperparameters from aixplain.enums import Function +from urllib.parse import urljoin import pytest @@ -37,6 +38,8 @@ COST_ESTIMATION_FILE = "tests/unit/mock_responses/cost_estimation_response.json" FINETUNE_URL = f"{config.BACKEND_URL}/sdk/finetune" FINETUNE_FILE = "tests/unit/mock_responses/finetune_response.json" +FINETUNE_STATUS_FILE = "tests/unit/mock_responses/finetune_status_response.json" +FINETUNE_STATUS_FILE_2 = "tests/unit/mock_responses/finetune_status_response_2.json" PERCENTAGE_EXCEPTION_FILE = "tests/unit/data/create_finetune_percentage_exception.json" MODEL_FILE = "tests/unit/mock_responses/model_response.json" MODEL_URL = f"{config.BACKEND_URL}/sdk/models" @@ -106,16 +109,27 @@ def test_start(): assert fine_tuned_model is not None assert fine_tuned_model.id == model_map["id"] - -def test_check_finetuner_status(): - model_map = read_data(MODEL_FILE) +@pytest.mark.parametrize( + "input_path,after_epoch,training_loss,validation_loss", + [ + (FINETUNE_STATUS_FILE, None, 0.4, 0.0217), + (FINETUNE_STATUS_FILE, 1, 0.2, 0.0482), + (FINETUNE_STATUS_FILE_2, None, 2.657801408034, 2.596168756485), + (FINETUNE_STATUS_FILE_2, 0, None, 2.684150457382) + ] +) +def test_check_finetuner_status(input_path, after_epoch, training_loss, validation_loss): + model_map = read_data(input_path) asset_id = "test_id" with requests_mock.Mocker() as mock: test_model = Model(asset_id, "") - url = f"{MODEL_URL}/{asset_id}" + url = urljoin(config.BACKEND_URL, f"sdk/finetune/{asset_id}/ml-logs") mock.get(url, headers=FIXED_HEADER, json=model_map) - status = test_model.check_finetune_status() - assert status == model_map["status"] + status = test_model.check_finetune_status(after_epoch=after_epoch) + assert status.status.value == model_map["finetuneStatus"] + assert status.model_status.value == model_map["modelStatus"] + assert status.training_loss == training_loss + assert status.validation_loss == validation_loss @pytest.mark.parametrize("is_finetunable", [True, False]) @@ -132,4 +146,4 @@ def test_list_finetunable_models(is_finetunable): model_list = result["results"] assert len(model_list) > 0 for model_index in range(len(model_list)): - assert model_list[model_index].id == list_map["items"][model_index]["id"] + assert model_list[model_index].id == list_map["items"][model_index]["id"] \ No newline at end of file diff --git a/tests/unit/mock_responses/finetune_status_response.json b/tests/unit/mock_responses/finetune_status_response.json new file mode 100644 index 00000000..9647b164 --- /dev/null +++ b/tests/unit/mock_responses/finetune_status_response.json @@ -0,0 +1,41 @@ +{ + "finetuneStatus": "onboarding", + "modelStatus": "onboarded", + "logs": [ + { + "epoch": 1, + "learningRate": 9.938725490196079e-05, + "trainLoss": 0.1, + "evalLoss": 0.1106, + "step": 10 + }, + { + "epoch": 2, + "learningRate": 9.877450980392157e-05, + "trainLoss": 0.2, + "evalLoss": 0.0482, + "step": 20 + }, + { + "epoch": 3, + "learningRate": 9.816176470588235e-05, + "trainLoss": 0.3, + "evalLoss": 0.0251, + "step": 30 + }, + { + "epoch": 4, + "learningRate": 9.754901960784314e-05, + "trainLoss": 0.9, + "evalLoss": 0.0228, + "step": 40 + }, + { + "epoch": 5, + "learningRate": 9.693627450980392e-05, + "trainLoss": 0.4, + "evalLoss": 0.0217, + "step": 50 + } + ] +} \ No newline at end of file diff --git a/tests/unit/mock_responses/finetune_status_response_2.json b/tests/unit/mock_responses/finetune_status_response_2.json new file mode 100644 index 00000000..ea5814a0 --- /dev/null +++ b/tests/unit/mock_responses/finetune_status_response_2.json @@ -0,0 +1,49 @@ +{ + "id": "65fb26268fe9153a6c9c29c4", + "finetuneStatus": "in_progress", + "modelStatus": "training", + "logs": [ + { + "epoch": 1, + "learningRate": null, + "trainLoss": null, + "validationLoss": null, + "step": null, + "evalLoss": 2.684150457382, + "totalFlos": null, + "evalRuntime": 12.4129, + "trainRuntime": null, + "evalStepsPerSecond": 0.322, + "trainStepsPerSecond": null, + "evalSamplesPerSecond": 16.112 + }, + { + "epoch": 2, + "learningRate": null, + "trainLoss": null, + "validationLoss": null, + "step": null, + "evalLoss": 2.596168756485, + "totalFlos": null, + "evalRuntime": 11.8249, + "trainRuntime": null, + "evalStepsPerSecond": 0.338, + "trainStepsPerSecond": null, + "evalSamplesPerSecond": 16.913 + }, + { + "epoch": 2, + "learningRate": null, + "trainLoss": 2.657801408034, + "validationLoss": null, + "step": null, + "evalLoss": null, + "totalFlos": 11893948284928, + "evalRuntime": null, + "trainRuntime": 221.7946, + "evalStepsPerSecond": null, + "trainStepsPerSecond": 0.117, + "evalSamplesPerSecond": null + } + ] +} \ No newline at end of file diff --git a/tests/unit/pipeline_test.py b/tests/unit/pipeline_test.py new file mode 100644 index 00000000..68a399aa --- /dev/null +++ b/tests/unit/pipeline_test.py @@ -0,0 +1,39 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2022 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from dotenv import load_dotenv + +load_dotenv() +import requests_mock +from aixplain.utils import config +from aixplain.factories import PipelineFactory +from aixplain.modules import Pipeline +from urllib.parse import urljoin +import pytest + + +def test_create_pipeline(): + with requests_mock.Mocker() as mock: + url = urljoin(config.BACKEND_URL, "sdk/pipelines") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + ref_response = {"id": "12345"} + mock.post(url, headers=headers, json=ref_response) + ref_pipeline = Pipeline(id="12345", name="Pipeline Test", api_key=config.TEAM_API_KEY) + hyp_pipeline = PipelineFactory.create(pipeline={}, name="Pipeline Test") + assert hyp_pipeline.id == ref_pipeline.id + assert hyp_pipeline.name == ref_pipeline.name