diff --git a/aixplain/enums/data_type.py b/aixplain/enums/data_type.py index d13542f9..fa79d070 100644 --- a/aixplain/enums/data_type.py +++ b/aixplain/enums/data_type.py @@ -35,4 +35,4 @@ class DataType(Enum): VIDEO = "video" def __str__(self): - return self._value_ \ No newline at end of file + return self._value_ diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index cd7de970..9ed3138f 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -65,7 +65,7 @@ def _create_model_from_response(cls, response: Dict) -> Model: response["name"], supplier=response["supplier"], api_key=response["api_key"], - pricing=response["pricing"], + cost=response["pricing"], function=Function(response["function"]["id"]), parameters=parameters, is_subscribed=True if "subscription" in response else False, @@ -404,9 +404,11 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed." logging.info(message) return response - + @classmethod - def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict: + def deploy_huggingface_model( + cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None + ) -> Dict: """Onboards and deploys a Hugging Face large language model. Args: @@ -433,20 +435,16 @@ def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Option "sourceLanguage": "en", }, "source": "huggingface", - "onboardingParams": { - "hf_model_name": model_name, - "hf_supplier": supplier, - "hf_token": hf_token - } + "onboardingParams": {"hf_model_name": model_name, "hf_supplier": supplier, "hf_token": hf_token}, } response = _request_with_retry("post", deploy_url, headers=headers, json=body) logging.debug(response.text) response_dicts = json.loads(response.text) return response_dicts - + @classmethod def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] = None): - """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID. + """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID. Args: model_id (Text): The model's ID as returned by DEPLOY_HUGGINGFACE_MODEL @@ -466,6 +464,6 @@ def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] = "status": response_dicts["status"], "name": response_dicts["name"], "id": response_dicts["id"], - "pricing": response_dicts["pricing"] + "pricing": response_dicts["pricing"], } - return ret_dict \ No newline at end of file + return ret_dict diff --git a/aixplain/factories/pipeline_factory.py b/aixplain/factories/pipeline_factory.py index 078bcae6..404a5556 100644 --- a/aixplain/factories/pipeline_factory.py +++ b/aixplain/factories/pipeline_factory.py @@ -22,6 +22,7 @@ """ import json import logging +import os from typing import Dict, List, Optional, Text, Union from aixplain.enums.data_type import DataType from aixplain.enums.function import Function @@ -207,7 +208,7 @@ def list( output_data_types = [output_data_types] payload["inputDataTypes"] = [data_type.value for data_type in output_data_types] - logging.info(f"Start service for POST List Dataset - {url} - {headers} - {json.dumps(payload)}") + logging.info(f"Start service for POST List Pipeline - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) resp = r.json() @@ -220,3 +221,40 @@ def list( for pipeline in results: pipelines.append(cls.__from_response(pipeline)) return {"results": pipelines, "page_total": page_total, "page_number": page_number, "total": total} + + @classmethod + def create(cls, name: Text, pipeline: Union[Text, Dict], status: Text = "draft") -> Pipeline: + """Pipeline Creation + + Args: + name (Text): Pipeline Name + pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file + status (Text, optional): Status of the pipeline. Currently only draft pipelines can be saved. Defaults to "draft". + + Raises: + Exception: Currently just the creation of draft pipelines are supported + + Returns: + Pipeline: instance of the new pipeline + """ + try: + assert status == "draft", "Pipeline Creation Error: Currently just the creation of draft pipelines are supported." + if isinstance(pipeline, str) is True: + _, ext = os.path.splitext(pipeline) + assert ( + os.path.exists(pipeline) and ext == ".json" + ), "Pipeline Creation Error: Make sure the pipeline to be save is in a JSON file." + with open(pipeline) as f: + pipeline = json.load(f) + + # prepare payload + payload = {"name": name, "status": "draft", "architecture": pipeline} + url = urljoin(cls.backend_url, "sdk/pipelines") + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for POST Create Pipeline - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) + response = r.json() + + return Pipeline(response["id"], name, config.TEAM_API_KEY) + except Exception as e: + raise Exception(e) diff --git a/aixplain/modules/asset.py b/aixplain/modules/asset.py index 34fea4e4..52b79912 100644 --- a/aixplain/modules/asset.py +++ b/aixplain/modules/asset.py @@ -36,7 +36,7 @@ def __init__( version: Text = "1.0", license: Optional[License] = None, privacy: Privacy = Privacy.PRIVATE, - cost: float = 0, + cost: Optional[Union[Dict, float]] = None, ) -> None: """Create an Asset with the necessary information @@ -46,6 +46,7 @@ def __init__( description (Text): Description of the Asset supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". version (Optional[Text], optional): asset version. Defaults to "1.0". + cost (Optional[Union[Dict, float]], optional): asset price. Defaults to None. """ self.id = id self.name = name diff --git a/aixplain/modules/metadata.py b/aixplain/modules/metadata.py index f7fac5f5..07007ebe 100644 --- a/aixplain/modules/metadata.py +++ b/aixplain/modules/metadata.py @@ -43,6 +43,7 @@ def __init__( file_extension: Optional[FileType] = None, languages: List[Language] = [], dsubtype: DataSubtype = DataSubtype.OTHER, + id: Optional[Text] = None, **kwargs ) -> None: """MetaData Class @@ -62,6 +63,7 @@ def __init__( file_extension (Optional[FileType], optional): File extension (e.g. CSV, TXT, etc.). Defaults to None. languages (List[Language], optional): List of languages which the data consists of. Defaults to []. dsubtype (DataSubtype, optional): Data subtype (e.g., age, topic, race, split, etc.), used in datasets metadata. Defaults to Other. + id (Optional[Text], optional): Data ID. Defaults to None. """ self.name = name if isinstance(dtype, str): @@ -91,4 +93,5 @@ def __init__( language = Language(language) self.languages.append(language) self.dsubtype = dsubtype + self.id = id self.kwargs = kwargs diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index 04a0bdd7..d591772b 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -61,12 +61,10 @@ def __init__( supplier (Text): author of the Metric is_reference_required (bool): does the metric use reference is_source_required (bool): does the metric use source - cost (float): cost of the metric + cost (float): price of the metric normalization_options(list, []) **additional_info: Any additional Metric info to be saved """ - - super().__init__(id, name, description="", supplier=supplier, version="1.0", cost=cost) self.is_source_required = is_source_required self.is_reference_required = is_reference_required @@ -76,7 +74,7 @@ def __init__( def __repr__(self) -> str: return f"" - + def add_normalization_options(self, normalization_options: List[str]): """Add a given set of normalization options to be used while benchmarking @@ -85,7 +83,12 @@ def add_normalization_options(self, normalization_options: List[str]): """ self.normalization_options.append(normalization_options) - def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional[Union[str, List[str]]]=None, reference: Optional[Union[str, List[str]]]=None): + def run( + self, + hypothesis: Optional[Union[str, List[str]]] = None, + source: Optional[Union[str, List[str]]] = None, + reference: Optional[Union[str, List[str]]] = None, + ): """Run the metric to calculate the scores. Args: @@ -94,6 +97,7 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None. """ from aixplain.factories.model_factory import ModelFactory + model = ModelFactory.get(self.id) payload = { "function": self.function, @@ -115,4 +119,3 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional reference = [[ref] for ref in reference] payload["references"] = reference return model.run(payload) - diff --git a/aixplain/modules/model.py b/aixplain/modules/model.py index fc3a82cd..983737c7 100644 --- a/aixplain/modules/model.py +++ b/aixplain/modules/model.py @@ -48,6 +48,7 @@ class Model(Asset): function (Text, optional): model AI function. Defaults to None. url (str): URL to run the model. backend_url (str): URL of the backend. + pricing (Dict, optional): model price. Defaults to None. **additional_info: Any additional Model info to be saved """ @@ -61,6 +62,7 @@ def __init__( version: Optional[Text] = None, function: Optional[Text] = None, is_subscribed: bool = False, + cost: Optional[Dict] = None, **additional_info, ) -> None: """Model Init @@ -74,9 +76,10 @@ def __init__( version (Text, optional): version of the model. Defaults to "1.0". function (Text, optional): model AI function. Defaults to None. is_subscribed (bool, optional): Is the user subscribed. Defaults to False. + cost (Dict, optional): model price. Defaults to None. **additional_info: Any additional Model info to be saved """ - super().__init__(id, name, description, supplier, version) + super().__init__(id, name, description, supplier, version, cost=cost) self.api_key = api_key self.additional_info = additional_info self.url = config.MODELS_RUN_URL @@ -264,6 +267,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): """ from aixplain.enums.asset_status import AssetStatus from aixplain.modules.finetune.status import FinetuneStatus + headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} resp = None try: @@ -274,7 +278,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): finetune_status = AssetStatus(resp["finetuneStatus"]) model_status = AssetStatus(resp["modelStatus"]) logs = sorted(resp["logs"], key=lambda x: float(x["epoch"])) - + target_epoch = None if after_epoch is not None: logs = [log for log in logs if float(log["epoch"]) > after_epoch] @@ -282,7 +286,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): target_epoch = float(logs[0]["epoch"]) elif len(logs) > 0: target_epoch = float(logs[-1]["epoch"]) - + if target_epoch is not None: log = None for log_ in logs: @@ -294,7 +298,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): log["trainLoss"] = log_["trainLoss"] if log_["evalLoss"] is not None: log["evalLoss"] = log_["evalLoss"] - + status = FinetuneStatus( status=finetune_status, model_status=model_status, diff --git a/aixplain/modules/pipeline.py b/aixplain/modules/pipeline.py index 011f3114..3de49756 100644 --- a/aixplain/modules/pipeline.py +++ b/aixplain/modules/pipeline.py @@ -23,11 +23,13 @@ import time import json +import os import logging from aixplain.modules.asset import Asset from aixplain.utils import config from aixplain.utils.file_utils import _request_with_retry from typing import Dict, Optional, Text, Union +from urllib.parse import urljoin class Pipeline(Asset): @@ -306,3 +308,32 @@ def run_async( if resp is not None: response["error"] = resp return response + + def update(self, pipeline: Union[Text, Dict]): + """Update Pipeline + + Args: + pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file + + Raises: + Exception: Make sure the pipeline to be save is in a JSON file. + """ + try: + if isinstance(pipeline, str) is True: + _, ext = os.path.splitext(pipeline) + assert ( + os.path.exists(pipeline) and ext == ".json" + ), "Pipeline Update Error: Make sure the pipeline to be save is in a JSON file." + with open(pipeline) as f: + pipeline = json.load(f) + + # prepare payload + payload = {"name": self.name, "status": "draft", "architecture": pipeline} + url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("put", url, headers=headers, json=payload) + response = r.json() + logging.info(f"Pipeline {response['id']} Updated.") + except Exception as e: + raise Exception(e) diff --git a/aixplain/processes/data_onboarding/onboard_functions.py b/aixplain/processes/data_onboarding/onboard_functions.py index 091458fd..35a64e12 100644 --- a/aixplain/processes/data_onboarding/onboard_functions.py +++ b/aixplain/processes/data_onboarding/onboard_functions.py @@ -97,11 +97,11 @@ def process_data_files( -1, 0, ) - if metadata.dtype in [DataType.AUDIO, DataType.IMAGE] or metadata.dsubtype == DataSubtype.INTERVAL: + if metadata.dtype in [DataType.AUDIO, DataType.IMAGE, DataType.LABEL] or metadata.dsubtype == DataSubtype.INTERVAL: files, data_column_idx, start_column_idx, end_column_idx, nrows = process_media_files.run( metadata=metadata, paths=paths, folder=folder ) - elif metadata.dtype in [DataType.TEXT, DataType.LABEL]: + elif metadata.dtype in [DataType.TEXT]: files, data_column_idx, nrows = process_text_files.run(metadata=metadata, paths=paths, folder=folder) return files, data_column_idx, start_column_idx, end_column_idx, nrows diff --git a/aixplain/processes/data_onboarding/process_media_files.py b/aixplain/processes/data_onboarding/process_media_files.py index 3f95b1e3..62fd369a 100644 --- a/aixplain/processes/data_onboarding/process_media_files.py +++ b/aixplain/processes/data_onboarding/process_media_files.py @@ -5,6 +5,7 @@ import pandas as pd import shutil import tarfile +import validators from aixplain.enums.data_subtype import DataSubtype from aixplain.enums.data_type import DataType @@ -16,6 +17,7 @@ from pathlib import Path from tqdm import tqdm from typing import List, Tuple +from urllib.parse import urlparse AUDIO_MAX_SIZE = 50000000 IMAGE_TEXT_MAX_SIZE = 25000000 @@ -45,6 +47,11 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) -> Returns: Tuple[List[File], int, int, int]: list of s3 links; data, start and end columns index, and number of rows """ + if metadata.dtype != DataType.LABEL: + assert ( + metadata.storage_type != StorageType.TEXT + ), f'Data Asset Onboarding Error: Column "{metadata.name}" of type "{metadata.dtype}" can not be stored in text.' + # if files are stored locally, create a folder to store it media_folder = Path(".") if metadata.storage_type == StorageType.FILE: @@ -95,6 +102,10 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) -> assert ( os.path.getsize(media_path) <= AUDIO_MAX_SIZE ), f'Data Asset Onboarding Error: Local audio file "{media_path}" exceeds the size limit of 50 MB.' + elif metadata.dtype == DataType.LABEL: + assert ( + os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE + ), f'Data Asset Onboarding Error: Local label file "{media_path}" exceeds the size limit of 25 MB.' else: assert ( os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE @@ -105,6 +116,13 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) -> shutil.copy2(media_path, new_path) batch.append(fname) else: + if metadata.storage_type == StorageType.TEXT and ( + str(media_path).startswith("s3://") + or str(media_path).startswith("http://") + or str(media_path).startswith("https://") + or validators.url(media_path) + ): + media_path = "DONOTDOWNLOAD" + str(media_path) batch.append(media_path) # crop intervals can not be used with interval data types diff --git a/aixplain/processes/data_onboarding/process_text_files.py b/aixplain/processes/data_onboarding/process_text_files.py index 48db3f4e..1ba7f47e 100644 --- a/aixplain/processes/data_onboarding/process_text_files.py +++ b/aixplain/processes/data_onboarding/process_text_files.py @@ -26,7 +26,7 @@ def process_text(content: str, storage_type: StorageType) -> Text: Text: textual content """ if storage_type == StorageType.FILE: - # Check the size of file and assert a limit of 50 MB + # Check the size of file and assert a limit of 25 MB assert ( os.path.getsize(content) <= 25000000 ), f'Data Asset Onboarding Error: Local text file "{content}" exceeds the size limit of 25 MB.' diff --git a/docs/assets/architecture.png b/docs/assets/architecture.png new file mode 100644 index 00000000..91131c72 Binary files /dev/null and b/docs/assets/architecture.png differ diff --git a/docs/development/developer_guide.md b/docs/development/developer_guide.md index cfcedbe6..aaabc8f5 100644 --- a/docs/development/developer_guide.md +++ b/docs/development/developer_guide.md @@ -40,8 +40,13 @@ set LOG_LEVEL=DEBUG %env LOG_LEVEL=DEBUG ``` +## Architecture -## Data Asset Onboard +### Diagram + +Data Asset Onboard Process + +### Data Asset Onboard The image below depicts the onboard process of a data asset (e.g. corpora and datasets): diff --git a/docs/samples/label_dataset_onboarding/corpus/images/1.jpg b/docs/samples/label_dataset_onboarding/corpus/images/1.jpg new file mode 100644 index 00000000..ae3d592c Binary files /dev/null and b/docs/samples/label_dataset_onboarding/corpus/images/1.jpg differ diff --git a/docs/samples/label_dataset_onboarding/corpus/images/2.png b/docs/samples/label_dataset_onboarding/corpus/images/2.png new file mode 100644 index 00000000..ba23ab11 Binary files /dev/null and b/docs/samples/label_dataset_onboarding/corpus/images/2.png differ diff --git a/docs/samples/label_dataset_onboarding/corpus/index.csv b/docs/samples/label_dataset_onboarding/corpus/index.csv new file mode 100644 index 00000000..69ba347a --- /dev/null +++ b/docs/samples/label_dataset_onboarding/corpus/index.csv @@ -0,0 +1,3 @@ +,images,labels +0,corpus/images/1.jpg,corpus/labels/1.json +1,corpus/images/2.png,corpus/labels/2.json diff --git a/docs/samples/label_dataset_onboarding/corpus/labels/1.json b/docs/samples/label_dataset_onboarding/corpus/labels/1.json new file mode 100644 index 00000000..6947447f --- /dev/null +++ b/docs/samples/label_dataset_onboarding/corpus/labels/1.json @@ -0,0 +1,9 @@ +{ + "data": "arcade", + "boundingBox": { + "top": 0, + "bottom": 0, + "left": 0, + "right": 0 + } +} \ No newline at end of file diff --git a/docs/samples/label_dataset_onboarding/corpus/labels/2.json b/docs/samples/label_dataset_onboarding/corpus/labels/2.json new file mode 100644 index 00000000..b990cfd3 --- /dev/null +++ b/docs/samples/label_dataset_onboarding/corpus/labels/2.json @@ -0,0 +1,9 @@ +{ + "data": "building", + "boundingBox": { + "top": 0, + "bottom": 0, + "left": 0, + "right": 0 + } +} \ No newline at end of file diff --git a/docs/samples/label_dataset_onboarding/label_dataset_onboarding.ipynb b/docs/samples/label_dataset_onboarding/label_dataset_onboarding.ipynb new file mode 100644 index 00000000..f499dd51 --- /dev/null +++ b/docs/samples/label_dataset_onboarding/label_dataset_onboarding.ipynb @@ -0,0 +1,399 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image Label Detection Dataset Onboarding\n", + "\n", + "This notebook demonstrates how to onboard a dataset with label data into aiXplain platform using its SDK." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Credentials\n", + "\n", + "To use the aiXplain SDK, you may be registered in our platform and have an API key. The step-by-step on how to do it is better described [here](/docs/user/api_setup.md)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"TEAM_API_KEY\"] = \"YOUR_TEAM_API_KEY_HERE\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data\n", + "\n", + "In this example we will show how to onboard a sample dataset of images and their corresponding labels. To onboard it, the data needs to be depicted in a CSV file, which will be fed to the SDK. \n", + "\n", + "Label data should have be one or more elements in a JSON file according to one of the following structure:\n", + "\n", + "```json\n", + "{\n", + " \"data\": \"TEXT_AUDIO_LABEL\",\n", + " \"boundingBox\": {\n", + " \"start\": 0, // start character\n", + " \"end\": 0, // end character\n", + " }\n", + "}\n", + "\n", + "{\n", + " \"data\": \"AUDIO_LABEL\",\n", + " \"boundingBox\": {\n", + " \"start\": 0, // start second\n", + " \"end\": 0 // end second\n", + " }\n", + "}\n", + "\n", + "{\n", + " \"data\": \"IMAGE_LABEL\",\n", + " \"boundingBox\": {\n", + " \"top\": 0, // top percentage of the image\n", + " \"bottom\": 0, // bottom percentage of the image\n", + " \"left\": 0, // left percentage of the image\n", + " \"right\": 0 // right percentage of the image\n", + " }\n", + "}\n", + "\n", + "{\n", + " \"data\": \"VIDEO_LABEL\",\n", + " \"boundingBox\": {\n", + " \"start\": 0, // start second\n", + " \"end\": 0, // end second\n", + " \"top\": 0, // top percentage of the image\n", + " \"bottom\": 0, // bottom percentage of the image\n", + " \"left\": 0, // left percentage of the image\n", + " \"right\": 0 // right percentage of the image\n", + " }\n", + "}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/1p/jbswfpbs73q5qbbh78dzj5xm0000gn/T/ipykernel_47954/611755932.py:1: DeprecationWarning: \n", + "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n", + "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n", + "but was not found to be installed on your system.\n", + "If this would cause problems for you,\n", + "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n", + " \n", + " import pandas as pd\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0imageslabels
00corpus/images/1.jpgcorpus/labels/1.json
11corpus/images/2.pngcorpus/labels/2.json
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 images labels\n", + "0 0 corpus/images/1.jpg corpus/labels/1.json\n", + "1 1 corpus/images/2.png corpus/labels/2.json" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "upload_file = \"corpus/index.csv\"\n", + "data = pd.read_csv(upload_file)\n", + "data" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import\n", + "\n", + "Let's now import the necessary classes to onboard the corpus." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from aixplain.enums import DataType, DataSubtype, Function, Language, License, StorageType\n", + "from aixplain.factories import DatasetFactory\n", + "from aixplain.modules import MetaData" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metadata\n", + "\n", + "Besides the CSV file, a schema must be fed to the SDK giving some information about the input and output data to be onboarded, such as: \n", + "\n", + "1. Data Name\n", + "2. Data Type: Audio, Text, Image, Video, Label, etc.\n", + "3. Storage Type: whether the data is depicted in the CSV (Text), in a local file (File) or in a public link (URL)\n", + "4. Start Column (optional): the column which depicts the beginning of the segment in the original file\n", + "5. End Column (optional): the column which depicts the end of the segment in the original file\n", + "6. Languages (optional): the languages depicted in the data" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's instantiate the metadata for the images:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "image_meta = MetaData(\n", + " name=\"images\", \n", + " dtype=\"image\", \n", + " storage_type=\"file\", \n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now for the labels...\n", + "\n", + "(See how we can use enumerations instead of strings to specify some information)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "label_meta = MetaData(\n", + " name=\"labels\", \n", + " dtype=DataType.LABEL, \n", + " storage_type=StorageType.FILE,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's now create the schemas for the input and output data of the dataset. Since this is a image label detection dataset, the images will be set as the input and the labels as the output data." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "input_schema = [image_meta]\n", + "output_schema = [label_meta]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally we can called the `create` method to onboard the dataset, specifying the name, description, license, path to the content files and schemas. \n", + "\n", + "See that a Dataset ID will be provided as response together with the status of the onboarding process." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " Dataset's inputs onboard progress: 0%| | 0/1 [00:00,\n", + " 'privacy': ,\n", + " 'cost': 0,\n", + " 'onboard_status': ,\n", + " 'function': ,\n", + " 'source_data': {'images': },\n", + " 'target_data': {'labels': []},\n", + " 'hypotheses': {},\n", + " 'metadata': {},\n", + " 'tags': [],\n", + " 'length': None,\n", + " 'kwargs': {}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = DatasetFactory.get(payload[\"asset_id\"])\n", + "dataset.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/user/user_doc.md b/docs/user/user_doc.md index 6b52e7d0..89efa478 100644 --- a/docs/user/user_doc.md +++ b/docs/user/user_doc.md @@ -286,6 +286,9 @@ Using the aiXplain SDK, you can also onboard your dataset into the aiXplain plat - Machine translation dataset directly from s3: - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Asnjeq5JQ9pV6UUQ2Z20XtrjnoaFD0nf?usp=sharing) +- Image Label Detection Dataset: + - [Link](../samples/label_dataset_onboarding/label_dataset_onboarding.ipynb) + ## FineTune [FineTune](https://aixplain.com/platform/finetune) allows you to customize models by tuning them using your data and enhancing their performance. Set up and start fine-tuning with a few lines of code. Once fine-tuning is complete, the model will be deployed into your assets, ready for you to use. diff --git a/pyproject.toml b/pyproject.toml index 9ad67878..112c8f9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.5rc" +version = "0.2.12" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" @@ -51,7 +51,7 @@ dependencies = [ "filetype>=1.2.0", "click>=7.1.2,<8.0.0", "PyYAML>=6.0.1", - "dataclasses-json==0.6.1" + "dataclasses-json>=0.5.2" ] [project.urls] diff --git a/tests/functional/finetune/data/finetune_test_end2end.json b/tests/functional/finetune/data/finetune_test_end2end.json index ead1fd88..80768de9 100644 --- a/tests/functional/finetune/data/finetune_test_end2end.json +++ b/tests/functional/finetune/data/finetune_test_end2end.json @@ -10,17 +10,17 @@ { "model_name": "aiR", "model_id": "6499cc946eb5633de15d82a1", - "dataset_name": "Test search dataset metadata", + "dataset_name": "Test search dataset", "inference_data": "Hello!", "required_dev": false, - "search_metadata": true + "search_metadata": false }, { "model_name": "vectara", "model_id": "655e20f46eb563062a1aa301", - "dataset_name": "Test search dataset metadata", + "dataset_name": "Test search dataset", "inference_data": "Hello!", "required_dev": false, - "search_metadata": true + "search_metadata": false } ] \ No newline at end of file diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index 94693f05..ffa9ad5a 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -83,6 +83,8 @@ def test_end2end(run_input_map): time.sleep(5) end = time.time() assert finetune_model.check_finetune_status().model_status.value == "onboarded" + time.sleep(30) + print(f"Model dict: {finetune_model.__dict__}") result = finetune_model.run(run_input_map["inference_data"]) print(f"Result: {result}") assert result is not None diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index 6a9dceda..93a3b297 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -82,8 +82,8 @@ def test_model_sort(): prev_model = models[idx - 1] model = models[idx] - prev_model_price = prev_model.additional_info["pricing"]["price"] - model_price = model.additional_info["pricing"]["price"] + prev_model_price = prev_model.cost["price"] + model_price = model.cost["price"] assert prev_model_price >= model_price diff --git a/tests/functional/model/hf_onboarding_test.py b/tests/functional/model/hf_onboarding_test.py index b70b0580..47a38361 100644 --- a/tests/functional/model/hf_onboarding_test.py +++ b/tests/functional/model/hf_onboarding_test.py @@ -1,11 +1,14 @@ __author__ = "michaellam" -import time +import pytest +import time from aixplain.factories.model_factory import ModelFactory from tests.test_utils import delete_asset from aixplain.utils import config + +@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_deploy_model(): # Start the deployment model_name = "Test Model" @@ -26,6 +29,8 @@ def test_deploy_model(): # Clean up delete_asset(model_id, config.TEAM_API_KEY) + +@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_nonexistent_model(): # Start the deployment model_name = "Test Model" @@ -34,6 +39,8 @@ def test_nonexistent_model(): assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" + +@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_size_limit(): # Start the deployment model_name = "Test Model" @@ -42,10 +49,12 @@ def test_size_limit(): assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" + +@pytest.mark.skip(reason="Model Deployment is deactivated for improvements.") def test_gated_model(): # Start the deployment model_name = "Test Model" repo_id = "meta-llama/Llama-2-7b-hf" response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") assert response["statusCode"] == 400 - assert response["message"] == "err.unable_to_onboard_model" \ No newline at end of file + assert response["message"] == "err.unable_to_onboard_model" diff --git a/tests/functional/pipelines/create_test.py b/tests/functional/pipelines/create_test.py new file mode 100644 index 00000000..f2c1a9c9 --- /dev/null +++ b/tests/functional/pipelines/create_test.py @@ -0,0 +1,64 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2022 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import json +import pytest +from aixplain.factories import PipelineFactory +from aixplain.modules import Pipeline +from uuid import uuid4 + + +def test_create_pipeline_from_json(): + pipeline_json = "tests/functional/pipelines/data/pipeline.json" + pipeline_name = str(uuid4()) + pipeline = PipelineFactory.create(name=pipeline_name, pipeline=pipeline_json) + + assert isinstance(pipeline, Pipeline) + assert pipeline.id != "" + + +def test_create_pipeline_from_string(): + pipeline_json = "tests/functional/pipelines/data/pipeline.json" + with open(pipeline_json) as f: + pipeline_dict = json.load(f) + + pipeline_name = str(uuid4()) + pipeline = PipelineFactory.create(name=pipeline_name, pipeline=pipeline_dict) + + assert isinstance(pipeline, Pipeline) + assert pipeline.id != "" + + +def test_update_pipeline(): + pipeline_json = "tests/functional/pipelines/data/pipeline.json" + with open(pipeline_json) as f: + pipeline_dict = json.load(f) + + pipeline_name = str(uuid4()) + pipeline = PipelineFactory.create(name=pipeline_name, pipeline=pipeline_dict) + + pipeline.update(pipeline=pipeline_json) + assert isinstance(pipeline, Pipeline) + assert pipeline.id != "" + + +def test_create_pipeline_wrong_path(): + pipeline_name = str(uuid4()) + + with pytest.raises(Exception): + pipeline = PipelineFactory.create(name=pipeline_name, pipeline="/") diff --git a/tests/functional/pipelines/data/pipeline.json b/tests/functional/pipelines/data/pipeline.json new file mode 100644 index 00000000..f48d6d4d --- /dev/null +++ b/tests/functional/pipelines/data/pipeline.json @@ -0,0 +1,100 @@ +{ + "links": [ + { + "from": 0, + "to": 1, + "paramMapping": [ + { + "from": "input", + "to": "text" + } + ] + }, + { + "from": 1, + "to": 2, + "paramMapping": [ + { + "from": "data", + "to": "text" + } + ] + }, + { + "from": 2, + "to": 3, + "paramMapping": [ + { + "from": "data", + "to": "output" + } + ] + } + ], + "nodes": [ + { + "number": 0, + "type": "INPUT" + }, + { + "number": 1, + "type": "ASSET", + "function": "sentiment-analysis", + "inputValues": [ + { + "code": "language", + "value": "en" + }, + { + "code": "text", + "dataType": "text" + } + ], + "assetId": "6172874f720b09325cbcdc33", + "assetType": "MODEL", + "autoSelectOptions": [], + "functionType": "AI", + "status": "Exists", + "outputValues": [ + { + "code": "data", + "dataType": "label" + } + ] + }, + { + "number": 2, + "type": "ASSET", + "function": "translation", + "inputValues": [ + { + "code": "sourcelanguage", + "value": "en" + }, + { + "code": "targetlanguage", + "value": "es" + }, + { + "code": "text", + "dataType": "text" + } + ], + "assetId": "61b097551efecf30109d3316", + "assetType": "MODEL", + "autoSelectOptions": [], + "functionType": "AI", + "status": "Exists", + "outputValues": [ + { + "code": "data", + "dataType": "text" + } + ] + }, + { + "number": 3, + "type": "OUTPUT" + } + ] +} \ No newline at end of file diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index e7af6c4e..e4389587 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -109,24 +109,24 @@ def test_run_multipipe_with_datasets(batchmode: bool): assert response["status"] == "SUCCESS" -@pytest.mark.parametrize("batchmode", [True, False]) -def test_run_segment_reconstruct(batchmode: bool): +def test_run_segment_reconstruct(): pipeline = PipelineFactory.list(query="Segmentation/Reconstruction Functional Test - DO NOT DELETE")["results"][0] - response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", **{"batchmode": batchmode}) + response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav") assert response["status"] == "SUCCESS" output = response["data"][0] assert output["label"] == "Output 1" -@pytest.mark.parametrize("batchmode", [True, False]) -def test_run_metric(batchmode: bool): +def test_run_metric(): pipeline = PipelineFactory.list(query="ASR Metric Functional Test - DO NOT DELETE")["results"][0] - response = pipeline.run({ - "AudioInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", - "ReferenceInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt" - }, **{"batchmode": batchmode}) - + response = pipeline.run( + { + "AudioInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "ReferenceInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", + } + ) + assert response["status"] == "SUCCESS" assert len(response["data"]) == 2 assert response["data"][0]["label"] in ["TranscriptOutput", "ScoreOutput"] @@ -134,34 +134,30 @@ def test_run_metric(batchmode: bool): @pytest.mark.parametrize( - "batchmode,input_data,output_data", + "input_data,output_data", [ - (True, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"), - (False, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"), - (True, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput"), - (False, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput") - ] + ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"), + ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput"), + ], ) -def test_run_router(batchmode: bool, input_data: str, output_data: str): +def test_run_router(input_data: str, output_data: str): pipeline = PipelineFactory.list(query="Router Test - DO NOT DELETE")["results"][0] - response = pipeline.run(input_data, **{"batchmode": batchmode}) - + response = pipeline.run(input_data) + assert response["status"] == "SUCCESS" assert response["data"][0]["label"] == output_data @pytest.mark.parametrize( - "batchmode,input_data,output_data", + "input_data,output_data", [ - (True, "I love it.", "PositiveOutput"), - (False, "I love it.", "PositiveOutput"), - (True, "I hate it.", "NegativeOutput"), - (False, "I hate it.", "NegativeOutput") - ] + ("I love it.", "PositiveOutput"), + ("I hate it.", "NegativeOutput"), + ], ) -def test_run_decision(batchmode: bool, input_data: str, output_data: str): +def test_run_decision(input_data: str, output_data: str): pipeline = PipelineFactory.list(query="Decision Test - DO NOT DELETE")["results"][0] - response = pipeline.run(input_data, **{"batchmode": batchmode}) - + response = pipeline.run(input_data) + assert response["status"] == "SUCCESS" - assert response["data"][0]["label"] == output_data \ No newline at end of file + assert response["data"][0]["label"] == output_data diff --git a/tests/image_upload_e2e_test.py b/tests/image_upload_e2e_test.py index 5e46c325..0e2ccbc5 100644 --- a/tests/image_upload_e2e_test.py +++ b/tests/image_upload_e2e_test.py @@ -6,8 +6,10 @@ from tests.test_utils import delete_asset, delete_service_account from aixplain.utils import config import docker -import os +import pytest + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_and_upload_model(): # List the host machines host_response = ModelFactory.list_host_machines() @@ -44,7 +46,7 @@ def test_create_and_upload_model(): # Log into the image repository. login_response = ModelFactory.asset_repo_login() - + assert login_response["username"] == "AWS" assert login_response["registry"] == "535945872701.dkr.ecr.us-east-1.amazonaws.com" assert "password" in login_response.keys() @@ -55,12 +57,12 @@ def test_create_and_upload_model(): # Push an image to ECR # os.system("aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 535945872701.dkr.ecr.us-east-1.amazonaws.com") - low_level_client = docker.APIClient(base_url='unix://var/run/docker.sock') + low_level_client = docker.APIClient(base_url="unix://var/run/docker.sock") # low_level_client.pull("535945872701.dkr.ecr.us-east-1.amazonaws.com/bash") # low_level_client.tag("535945872701.dkr.ecr.us-east-1.amazonaws.com/bash", f"{registry}/{repo_name}") low_level_client.pull("bash") low_level_client.tag("bash", f"{registry}/{repo_name}") - low_level_client.push(f"{registry}/{repo_name}", auth_config={"username":username, "password":password}) + low_level_client.push(f"{registry}/{repo_name}", auth_config={"username": username, "password": password}) # Send an email to finalize onboarding process ModelFactory.onboard_model(model_id, "latest", "fake_hash") diff --git a/tests/image_upload_functional_test.py b/tests/image_upload_functional_test.py index 0d6aa219..b9dd3ebf 100644 --- a/tests/image_upload_functional_test.py +++ b/tests/image_upload_functional_test.py @@ -6,6 +6,8 @@ from aixplain.factories.model_factory import ModelFactory import pytest + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_login(): response = ModelFactory.asset_repo_login() assert response["username"] == "AWS" @@ -15,6 +17,8 @@ def test_login(): # Test cleanup delete_service_account(config.TEAM_API_KEY) + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_asset_repo(): with open(Path("tests/test_requests/create_asset_request.json")) as f: mock_register_payload = json.load(f) @@ -33,6 +37,8 @@ def test_create_asset_repo(): # Test cleanup delete_asset(response["id"], config.TEAM_API_KEY) + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_list_host_machines(): response = ModelFactory.list_host_machines() for hosting_machine_dict in response: @@ -42,6 +48,8 @@ def test_list_host_machines(): assert "memory" in hosting_machine_dict.keys() assert "hourlyCost" in hosting_machine_dict.keys() + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_get_functions(): # Verbose response = ModelFactory.list_functions(True) @@ -53,7 +61,7 @@ def test_get_functions(): assert "name" in item.keys() # Non-verbose - response = ModelFactory.list_functions() # Not verbose by default + response = ModelFactory.list_functions() # Not verbose by default items = response["items"] for item in items: assert "output" not in item.keys() @@ -61,6 +69,7 @@ def test_get_functions(): assert "id" not in item.keys() assert "name" in item.keys() + @pytest.mark.skip(reason="Not included in first release") def list_image_repo_tags(): response = ModelFactory.list_image_repo_tags() diff --git a/tests/image_upload_test.py b/tests/image_upload_test.py index bb120533..fb919171 100644 --- a/tests/image_upload_test.py +++ b/tests/image_upload_test.py @@ -13,8 +13,9 @@ API_FIXED_HEADER = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"} +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_login(): - url = urljoin(config.BACKEND_URL, f"sdk/ecr/login") + url = urljoin(config.BACKEND_URL, f"sdk/ecr/login") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/login_response.json")) as f: mock_json = json.load(f) @@ -22,8 +23,10 @@ def test_login(): creds = ModelFactory.asset_repo_login(config.TEAM_API_KEY) assert creds == mock_json + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_create_asset_repo(): - url_register = urljoin(config.BACKEND_URL, f"sdk/models/register") + url_register = urljoin(config.BACKEND_URL, f"sdk/models/register") url_function = urljoin(config.BACKEND_URL, f"sdk/functions") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/create_asset_repo_response.json")) as f: @@ -32,12 +35,15 @@ def test_create_asset_repo(): with open(Path("tests/mock_responses/list_functions_response.json")) as f: mock_json_functions = json.load(f) mock.get(url_function, headers=AUTH_FIXED_HEADER, json=mock_json_functions) - model_id = ModelFactory.create_asset_repo("mock_name", "mock_machines", "mock_version", - "mock_description", "Speech Recognition", "en", config.TEAM_API_KEY) + model_id = ModelFactory.create_asset_repo( + "mock_name", "mock_machines", "mock_version", "mock_description", "Speech Recognition", "en", config.TEAM_API_KEY + ) assert model_id == mock_json_register + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_list_host_machines(): - url = urljoin(config.BACKEND_URL, f"sdk/hosting-machines") + url = urljoin(config.BACKEND_URL, f"sdk/hosting-machines") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/list_host_machines_response.json")) as f: mock_json = json.load(f) @@ -49,8 +55,10 @@ def test_list_host_machines(): for key in machine_dict.keys(): assert machine_dict[key] == mock_json_dict[key] + +@pytest.mark.skip(reason="Model Upload is deactivated for improvements.") def test_get_functions(): - url = urljoin(config.BACKEND_URL, f"sdk/functions") + url = urljoin(config.BACKEND_URL, f"sdk/functions") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/list_functions_response.json")) as f: mock_json = json.load(f) @@ -58,10 +66,11 @@ def test_get_functions(): functions = ModelFactory.list_functions(config.TEAM_API_KEY) assert functions == mock_json + @pytest.mark.skip(reason="Not currently supported.") def test_list_image_repo_tags(): model_id = "mock_id" - url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}/images") + url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}/images") with requests_mock.Mocker() as mock: with open(Path("tests/mock_responses/list_image_repo_tags_response.json")) as f: mock_json = json.load(f) diff --git a/tests/unit/pipeline_test.py b/tests/unit/pipeline_test.py new file mode 100644 index 00000000..68a399aa --- /dev/null +++ b/tests/unit/pipeline_test.py @@ -0,0 +1,39 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2022 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from dotenv import load_dotenv + +load_dotenv() +import requests_mock +from aixplain.utils import config +from aixplain.factories import PipelineFactory +from aixplain.modules import Pipeline +from urllib.parse import urljoin +import pytest + + +def test_create_pipeline(): + with requests_mock.Mocker() as mock: + url = urljoin(config.BACKEND_URL, "sdk/pipelines") + headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} + ref_response = {"id": "12345"} + mock.post(url, headers=headers, json=ref_response) + ref_pipeline = Pipeline(id="12345", name="Pipeline Test", api_key=config.TEAM_API_KEY) + hyp_pipeline = PipelineFactory.create(pipeline={}, name="Pipeline Test") + assert hyp_pipeline.id == ref_pipeline.id + assert hyp_pipeline.name == ref_pipeline.name