aixplain · hadi-aix · Jun 6, 2024 · Jan 15, 2024 · Jan 15, 2024 · Jan 16, 2024
diff --git a/aixplain/enums/data_type.py b/aixplain/enums/data_type.py
@@ -35,4 +35,4 @@ class DataType(Enum):
     VIDEO = "video"
 
     def __str__(self):
-        return self._value_
+        return self._value_
diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py
@@ -65,7 +65,7 @@ def _create_model_from_response(cls, response: Dict) -> Model:
             response["name"],
             supplier=response["supplier"],
             api_key=response["api_key"],
-            pricing=response["pricing"],
+            cost=response["pricing"],
             function=Function(response["function"]["id"]),
             parameters=parameters,
             is_subscribed=True if "subscription" in response else False,
@@ -404,9 +404,11 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke
         message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed."
         logging.info(message)
         return response
-    
+
     @classmethod
-    def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict:
+    def deploy_huggingface_model(
+        cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None
+    ) -> Dict:
         """Onboards and deploys a Hugging Face large language model.
 
         Args:
@@ -433,20 +435,16 @@ def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Option
                 "sourceLanguage": "en",
             },
             "source": "huggingface",
-            "onboardingParams": {
-                "hf_model_name": model_name,
-                "hf_supplier": supplier,
-                "hf_token": hf_token
-            }
+            "onboardingParams": {"hf_model_name": model_name, "hf_supplier": supplier, "hf_token": hf_token},
         }
         response = _request_with_retry("post", deploy_url, headers=headers, json=body)
         logging.debug(response.text)
         response_dicts = json.loads(response.text)
         return response_dicts
-    
+
     @classmethod
     def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] = None):
-        """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID. 
+        """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID.
 
         Args:
             model_id (Text): The model's ID as returned by DEPLOY_HUGGINGFACE_MODEL
@@ -466,6 +464,6 @@ def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] =
             "status": response_dicts["status"],
             "name": response_dicts["name"],
             "id": response_dicts["id"],
-            "pricing": response_dicts["pricing"]
+            "pricing": response_dicts["pricing"],
         }
-        return ret_dict
+        return ret_dict
diff --git a/aixplain/factories/pipeline_factory.py b/aixplain/factories/pipeline_factory.py
@@ -22,6 +22,7 @@
 """
 import json
 import logging
+import os
 from typing import Dict, List, Optional, Text, Union
 from aixplain.enums.data_type import DataType
 from aixplain.enums.function import Function
@@ -207,7 +208,7 @@ def list(
                 output_data_types = [output_data_types]
             payload["inputDataTypes"] = [data_type.value for data_type in output_data_types]
 
-        logging.info(f"Start service for POST List Dataset - {url} - {headers} - {json.dumps(payload)}")
+        logging.info(f"Start service for POST List Pipeline - {url} - {headers} - {json.dumps(payload)}")
         r = _request_with_retry("post", url, headers=headers, json=payload)
         resp = r.json()
 
@@ -220,3 +221,40 @@ def list(
             for pipeline in results:
                 pipelines.append(cls.__from_response(pipeline))
         return {"results": pipelines, "page_total": page_total, "page_number": page_number, "total": total}
+
+    @classmethod
+    def create(cls, name: Text, pipeline: Union[Text, Dict], status: Text = "draft") -> Pipeline:
+        """Pipeline Creation
+
+        Args:
+            name (Text): Pipeline Name
+            pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file
+            status (Text, optional): Status of the pipeline. Currently only draft pipelines can be saved. Defaults to "draft".
+
+        Raises:
+            Exception: Currently just the creation of draft pipelines are supported
+
+        Returns:
+            Pipeline: instance of the new pipeline
+        """
+        try:
+            assert status == "draft", "Pipeline Creation Error: Currently just the creation of draft pipelines are supported."
+            if isinstance(pipeline, str) is True:
+                _, ext = os.path.splitext(pipeline)
+                assert (
+                    os.path.exists(pipeline) and ext == ".json"
+                ), "Pipeline Creation Error: Make sure the pipeline to be save is in a JSON file."
+                with open(pipeline) as f:
+                    pipeline = json.load(f)
+
+            # prepare payload
+            payload = {"name": name, "status": "draft", "architecture": pipeline}
+            url = urljoin(cls.backend_url, "sdk/pipelines")
+            headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"}
+            logging.info(f"Start service for POST Create Pipeline - {url} - {headers} - {json.dumps(payload)}")
+            r = _request_with_retry("post", url, headers=headers, json=payload)
+            response = r.json()
+
+            return Pipeline(response["id"], name, config.TEAM_API_KEY)
+        except Exception as e:
+            raise Exception(e)
diff --git a/aixplain/modules/asset.py b/aixplain/modules/asset.py
@@ -36,7 +36,7 @@ def __init__(
         version: Text = "1.0",
         license: Optional[License] = None,
         privacy: Privacy = Privacy.PRIVATE,
-        cost: float = 0,
+        cost: Optional[Union[Dict, float]] = None,
     ) -> None:
         """Create an Asset with the necessary information
 
@@ -46,6 +46,7 @@ def __init__(
             description (Text): Description of the Asset
             supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain".
             version (Optional[Text], optional): asset version. Defaults to "1.0".
+            cost (Optional[Union[Dict, float]], optional): asset price. Defaults to None.
         """
         self.id = id
         self.name = name

diff --git a/aixplain/modules/metadata.py b/aixplain/modules/metadata.py
@@ -43,6 +43,7 @@ def __init__(
         file_extension: Optional[FileType] = None,
         languages: List[Language] = [],
         dsubtype: DataSubtype = DataSubtype.OTHER,
+        id: Optional[Text] = None,
         **kwargs
     ) -> None:
         """MetaData Class
@@ -62,6 +63,7 @@ def __init__(
             file_extension (Optional[FileType], optional): File extension (e.g. CSV, TXT, etc.). Defaults to None.
             languages (List[Language], optional): List of languages which the data consists of. Defaults to [].
             dsubtype (DataSubtype, optional): Data subtype (e.g., age, topic, race, split, etc.), used in datasets metadata. Defaults to Other.
+            id (Optional[Text], optional): Data ID. Defaults to None.
         """
         self.name = name
         if isinstance(dtype, str):
@@ -91,4 +93,5 @@ def __init__(
                 language = Language(language)
             self.languages.append(language)
         self.dsubtype = dsubtype
+        self.id = id
         self.kwargs = kwargs
diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py
@@ -61,12 +61,10 @@ def __init__(
             supplier (Text): author of the Metric
             is_reference_required (bool): does the metric use reference
             is_source_required (bool): does the metric use source
-            cost (float): cost of the metric
+            cost (float): price of the metric
             normalization_options(list, [])
             **additional_info: Any additional Metric info to be saved
         """
-
-
         super().__init__(id, name, description="", supplier=supplier, version="1.0", cost=cost)
         self.is_source_required = is_source_required
         self.is_reference_required = is_reference_required
@@ -76,7 +74,7 @@ def __init__(
 
     def __repr__(self) -> str:
         return f"<Metric {self.name}>"
-    
+
     def add_normalization_options(self, normalization_options: List[str]):
         """Add a given set of normalization options to be used while benchmarking
 
@@ -85,7 +83,12 @@ def add_normalization_options(self, normalization_options: List[str]):
         """
         self.normalization_options.append(normalization_options)
 
-    def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional[Union[str, List[str]]]=None, reference: Optional[Union[str, List[str]]]=None):
+    def run(
+        self,
+        hypothesis: Optional[Union[str, List[str]]] = None,
+        source: Optional[Union[str, List[str]]] = None,
+        reference: Optional[Union[str, List[str]]] = None,
+    ):
         """Run the metric to calculate the scores.
 
         Args:
@@ -94,6 +97,7 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional
             reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None.
         """
         from aixplain.factories.model_factory import ModelFactory
+
         model = ModelFactory.get(self.id)
         payload = {
             "function": self.function,
@@ -115,4 +119,3 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional
                 reference = [[ref] for ref in reference]
             payload["references"] = reference
         return model.run(payload)
-
diff --git a/aixplain/modules/model.py b/aixplain/modules/model.py
@@ -48,6 +48,7 @@ class Model(Asset):
         function (Text, optional): model AI function. Defaults to None.
         url (str): URL to run the model.
         backend_url (str): URL of the backend.
+        pricing (Dict, optional): model price. Defaults to None.
         **additional_info: Any additional Model info to be saved
     """
 
@@ -61,6 +62,7 @@ def __init__(
         version: Optional[Text] = None,
         function: Optional[Text] = None,
         is_subscribed: bool = False,
+        cost: Optional[Dict] = None,
         **additional_info,
     ) -> None:
         """Model Init
@@ -74,9 +76,10 @@ def __init__(
             version (Text, optional): version of the model. Defaults to "1.0".
             function (Text, optional): model AI function. Defaults to None.
             is_subscribed (bool, optional): Is the user subscribed. Defaults to False.
+            cost (Dict, optional): model price. Defaults to None.
             **additional_info: Any additional Model info to be saved
         """
-        super().__init__(id, name, description, supplier, version)
+        super().__init__(id, name, description, supplier, version, cost=cost)
         self.api_key = api_key
         self.additional_info = additional_info
         self.url = config.MODELS_RUN_URL
@@ -264,6 +267,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
         """
         from aixplain.enums.asset_status import AssetStatus
         from aixplain.modules.finetune.status import FinetuneStatus
+
         headers = {"x-api-key": self.api_key, "Content-Type": "application/json"}
         resp = None
         try:
@@ -274,15 +278,15 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
             finetune_status = AssetStatus(resp["finetuneStatus"])
             model_status = AssetStatus(resp["modelStatus"])
             logs = sorted(resp["logs"], key=lambda x: float(x["epoch"]))
-            
+
             target_epoch = None
             if after_epoch is not None:
                 logs = [log for log in logs if float(log["epoch"]) > after_epoch]
                 if len(logs) > 0:
                     target_epoch = float(logs[0]["epoch"])
             elif len(logs) > 0:
                 target_epoch = float(logs[-1]["epoch"])
-            
+
             if target_epoch is not None:
                 log = None
                 for log_ in logs:
@@ -294,7 +298,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
                                 log["trainLoss"] = log_["trainLoss"]
                             if log_["evalLoss"] is not None:
                                 log["evalLoss"] = log_["evalLoss"]
-                
+
                 status = FinetuneStatus(
                     status=finetune_status,
                     model_status=model_status,

diff --git a/aixplain/modules/pipeline.py b/aixplain/modules/pipeline.py
@@ -23,11 +23,13 @@
 
 import time
 import json
+import os
 import logging
 from aixplain.modules.asset import Asset
 from aixplain.utils import config
 from aixplain.utils.file_utils import _request_with_retry
 from typing import Dict, Optional, Text, Union
+from urllib.parse import urljoin
 
 
 class Pipeline(Asset):
@@ -306,3 +308,32 @@ def run_async(
             if resp is not None:
                 response["error"] = resp
         return response
+
+    def update(self, pipeline: Union[Text, Dict]):
+        """Update Pipeline
+
+        Args:
+            pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file
+
+        Raises:
+            Exception: Make sure the pipeline to be save is in a JSON file.
+        """
+        try:
+            if isinstance(pipeline, str) is True:
+                _, ext = os.path.splitext(pipeline)
+                assert (
+                    os.path.exists(pipeline) and ext == ".json"
+                ), "Pipeline Update Error: Make sure the pipeline to be save is in a JSON file."
+                with open(pipeline) as f:
+                    pipeline = json.load(f)
+
+            # prepare payload
+            payload = {"name": self.name, "status": "draft", "architecture": pipeline}
+            url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}")
+            headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"}
+            logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}")
+            r = _request_with_retry("put", url, headers=headers, json=payload)
+            response = r.json()
+            logging.info(f"Pipeline {response['id']} Updated.")
+        except Exception as e:
+            raise Exception(e)
diff --git a/aixplain/processes/data_onboarding/onboard_functions.py b/aixplain/processes/data_onboarding/onboard_functions.py
@@ -97,11 +97,11 @@ def process_data_files(
         -1,
         0,
     )
-    if metadata.dtype in [DataType.AUDIO, DataType.IMAGE] or metadata.dsubtype == DataSubtype.INTERVAL:
+    if metadata.dtype in [DataType.AUDIO, DataType.IMAGE, DataType.LABEL] or metadata.dsubtype == DataSubtype.INTERVAL:
         files, data_column_idx, start_column_idx, end_column_idx, nrows = process_media_files.run(
             metadata=metadata, paths=paths, folder=folder
         )
-    elif metadata.dtype in [DataType.TEXT, DataType.LABEL]:
+    elif metadata.dtype in [DataType.TEXT]:
         files, data_column_idx, nrows = process_text_files.run(metadata=metadata, paths=paths, folder=folder)
     return files, data_column_idx, start_column_idx, end_column_idx, nrows
 

diff --git a/aixplain/processes/data_onboarding/process_media_files.py b/aixplain/processes/data_onboarding/process_media_files.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import shutil
 import tarfile
+import validators
 
 from aixplain.enums.data_subtype import DataSubtype
 from aixplain.enums.data_type import DataType
@@ -16,6 +17,7 @@
 from pathlib import Path
 from tqdm import tqdm
 from typing import List, Tuple
+from urllib.parse import urlparse
 
 AUDIO_MAX_SIZE = 50000000
 IMAGE_TEXT_MAX_SIZE = 25000000
@@ -45,6 +47,11 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
     Returns:
         Tuple[List[File], int, int, int]: list of s3 links; data, start and end columns index, and number of rows
     """
+    if metadata.dtype != DataType.LABEL:
+        assert (
+            metadata.storage_type != StorageType.TEXT
+        ), f'Data Asset Onboarding Error: Column "{metadata.name}" of type "{metadata.dtype}" can not be stored in text.'
+
     # if files are stored locally, create a folder to store it
     media_folder = Path(".")
     if metadata.storage_type == StorageType.FILE:
@@ -95,6 +102,10 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
                     assert (
                         os.path.getsize(media_path) <= AUDIO_MAX_SIZE
                     ), f'Data Asset Onboarding Error: Local audio file "{media_path}" exceeds the size limit of 50 MB.'
+                elif metadata.dtype == DataType.LABEL:
+                    assert (
+                        os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE
+                    ), f'Data Asset Onboarding Error: Local label file "{media_path}" exceeds the size limit of 25 MB.'
                 else:
                     assert (
                         os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE
@@ -105,6 +116,13 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
                     shutil.copy2(media_path, new_path)
                 batch.append(fname)
             else:
+                if metadata.storage_type == StorageType.TEXT and (
+                    str(media_path).startswith("s3://")
+                    or str(media_path).startswith("http://")
+                    or str(media_path).startswith("https://")
+                    or validators.url(media_path)
+                ):
+                    media_path = "DONOTDOWNLOAD" + str(media_path)
                 batch.append(media_path)
 
             # crop intervals can not be used with interval data types

diff --git a/aixplain/processes/data_onboarding/process_text_files.py b/aixplain/processes/data_onboarding/process_text_files.py
@@ -26,7 +26,7 @@ def process_text(content: str, storage_type: StorageType) -> Text:
         Text: textual content
     """
     if storage_type == StorageType.FILE:
-        # Check the size of file and assert a limit of 50 MB
+        # Check the size of file and assert a limit of 25 MB
         assert (
             os.path.getsize(content) <= 25000000
         ), f'Data Asset Onboarding Error: Local text file "{content}" exceeds the size limit of 25 MB.'

diff --git a/docs/assets/architecture.png b/docs/assets/architecture.png