From b314dff233f24a8d433bbfdec6848af55ebee933 Mon Sep 17 00:00:00 2001 From: Abdelrahman Elsheikh Date: Wed, 6 Aug 2025 16:39:56 +0300 Subject: [PATCH 1/2] Add/improve docstrings according to Google style for all classes, functions, and methods --- aixplain/base/parameters.py | 54 +++- aixplain/decorators/api_key_checker.py | 20 ++ aixplain/enums/asset_status.py | 26 ++ aixplain/enums/code_interpeter.py | 17 +- aixplain/enums/data_split.py | 10 + aixplain/enums/data_subtype.py | 22 +- aixplain/enums/data_type.py | 25 +- aixplain/enums/database_source.py | 11 +- aixplain/enums/embedding_model.py | 18 +- aixplain/enums/file_type.py | 23 ++ aixplain/enums/function.py | 62 ++++- aixplain/enums/function_type.py | 18 ++ aixplain/enums/index_stores.py | 25 +- aixplain/enums/language.py | 42 ++- aixplain/enums/license.py | 41 ++- aixplain/enums/onboard_status.py | 11 + aixplain/enums/ownership_type.py | 14 + aixplain/enums/privacy.py | 10 + aixplain/enums/response_status.py | 15 + aixplain/enums/sort_by.py | 10 + aixplain/enums/sort_order.py | 9 + aixplain/enums/splitting_options.py | 17 ++ aixplain/enums/status.py | 10 + aixplain/enums/storage_type.py | 15 + aixplain/enums/supplier.py | 28 +- aixplain/exceptions/types.py | 69 ++++- aixplain/factories/agent_factory/__init__.py | 103 ++++++- aixplain/factories/agent_factory/utils.py | 34 ++- aixplain/factories/api_key_factory.py | 104 ++++++- aixplain/factories/asset_factory.py | 10 +- aixplain/factories/benchmark_factory.py | 140 +++++++--- aixplain/factories/cli/model_factory_cli.py | 121 +++++--- aixplain/factories/corpus_factory.py | 146 +++++++--- aixplain/factories/data_factory.py | 40 ++- aixplain/factories/dataset_factory.py | 170 +++++++++--- aixplain/factories/file_factory.py | 99 +++++-- .../factories/finetune_factory/__init__.py | 47 +++- .../finetune_factory/prompt_validator.py | 35 ++- aixplain/factories/index_factory/__init__.py | 72 ++++- aixplain/factories/index_factory/utils.py | 106 ++++++- aixplain/factories/integration_factory.py | 36 +++ aixplain/factories/metric_factory.py | 44 ++- aixplain/factories/model_factory/__init__.py | 106 ++++--- .../model_factory/mixins/model_getter.py | 40 ++- .../model_factory/mixins/model_list.py | 59 +++- aixplain/factories/model_factory/utils.py | 76 ++++- .../factories/pipeline_factory/__init__.py | 129 +++++++-- aixplain/factories/pipeline_factory/utils.py | 26 +- aixplain/factories/script_factory.py | 18 ++ .../factories/team_agent_factory/__init__.py | 67 ++++- .../team_agent_factory/inspector_factory.py | 77 ++++-- .../factories/team_agent_factory/utils.py | 35 ++- aixplain/factories/tool_factory.py | 31 ++- aixplain/factories/wallet_factory.py | 23 +- aixplain/modules/agent/__init__.py | 145 +++++++--- aixplain/modules/agent/agent_response.py | 77 ++++++ aixplain/modules/agent/agent_response_data.py | 89 +++++- aixplain/modules/agent/agent_task.py | 36 ++- aixplain/modules/agent/model_with_params.py | 30 ++ aixplain/modules/agent/output_format.py | 10 + aixplain/modules/agent/tool/__init__.py | 37 ++- .../agent/tool/custom_python_code_tool.py | 56 +++- aixplain/modules/agent/tool/model_tool.py | 85 +++++- aixplain/modules/agent/tool/pipeline_tool.py | 56 +++- .../agent/tool/python_interpreter_tool.py | 44 ++- aixplain/modules/agent/tool/sql_tool.py | 180 ++++++++++-- aixplain/modules/agent/utils.py | 18 ++ aixplain/modules/api_key.py | 245 +++++++++++++++- aixplain/modules/asset.py | 43 ++- aixplain/modules/benchmark.py | 31 ++- aixplain/modules/benchmark_job.py | 117 +++++++- aixplain/modules/content_interval.py | 75 +++++ aixplain/modules/corpus.py | 44 ++- aixplain/modules/data.py | 59 ++-- aixplain/modules/dataset.py | 22 +- aixplain/modules/file.py | 17 +- aixplain/modules/finetune/__init__.py | 85 +++--- aixplain/modules/finetune/cost.py | 12 + aixplain/modules/finetune/hyperparameters.py | 41 +++ aixplain/modules/finetune/status.py | 12 + aixplain/modules/metadata.py | 26 +- aixplain/modules/metric.py | 86 ++++-- aixplain/modules/model/__init__.py | 261 +++++++++++++----- aixplain/modules/model/connection.py | 92 +++++- aixplain/modules/model/index_model.py | 111 +++++++- aixplain/modules/model/integration.py | 137 +++++++-- aixplain/modules/model/llm_model.py | 117 +++++--- aixplain/modules/model/mcp_connection.py | 85 +++++- .../modules/model/model_response_streamer.py | 23 +- aixplain/modules/model/record.py | 25 +- aixplain/modules/model/response.py | 69 ++++- aixplain/modules/model/utility_model.py | 145 ++++++++-- aixplain/modules/model/utils.py | 137 ++++++++- aixplain/modules/pipeline/asset.py | 94 ++++++- aixplain/modules/pipeline/default.py | 12 + aixplain/modules/pipeline/response.py | 84 ++++++ aixplain/modules/team_agent/__init__.py | 103 ++++++- aixplain/modules/team_agent/inspector.py | 40 +++ aixplain/modules/wallet.py | 19 +- .../data_onboarding/onboard_functions.py | 182 +++++++++--- .../data_onboarding/process_media_files.py | 57 +++- .../data_onboarding/process_text_files.py | 67 ++++- aixplain/utils/asset_cache.py | 162 ++++++++++- aixplain/utils/cache_utils.py | 47 +++- aixplain/utils/convert_datatype_utils.py | 6 + aixplain/utils/file_utils.py | 123 +++++++-- aixplain/utils/llm_utils.py | 7 +- aixplain/utils/request_utils.py | 4 +- aixplain/utils/validation_utils.py | 67 ++++- 109 files changed, 5782 insertions(+), 888 deletions(-) diff --git a/aixplain/base/parameters.py b/aixplain/base/parameters.py index ea0cd895..def50f64 100644 --- a/aixplain/base/parameters.py +++ b/aixplain/base/parameters.py @@ -4,14 +4,33 @@ @dataclass class Parameter: + """A class representing a single parameter with its properties. + + Attributes: + name (str): The name of the parameter. + required (bool): Whether the parameter is required or optional. + value (Optional[Any]): The value of the parameter. Defaults to None. + """ name: str required: bool value: Optional[Any] = None class BaseParameters: + """A base class for managing a collection of parameters. + + This class provides functionality to store, access, and manipulate parameters + in a structured way. Parameters can be accessed using attribute syntax or + dictionary-style access. + + Attributes: + parameters (Dict[str, Parameter]): Dictionary storing Parameter objects. + """ def __init__(self) -> None: - """Initialize base parameters class""" + """Initialize the BaseParameters class. + + The initialization creates an empty dictionary to store parameters. + """ self.parameters: Dict[str, Parameter] = {} def get_parameter(self, name: str) -> Optional[Parameter]: @@ -34,10 +53,14 @@ def to_dict(self) -> Dict[str, Dict[str, Any]]: return {param.name: {"required": param.required, "value": param.value} for param in self.parameters.values()} def to_list(self) -> List[str]: - """Convert parameters back to list format. + """Convert parameters to a list format. + + This method creates a list of dictionaries containing the name and value + of each parameter that has a value set. Returns: - List[str]: List representation of parameters + List[str]: A list of dictionaries, each containing 'name' and 'value' + keys for parameters that have values set. """ return [{"name": param.name, "value": param.value} for param in self.parameters.values() if param.value is not None] @@ -59,11 +82,18 @@ def __str__(self) -> str: return "\n".join(lines) def __setattr__(self, name: str, value: Any) -> None: - """Allow setting parameters using attribute syntax (e.g., params.text = "Hello"). + """Allow setting parameters using attribute syntax. + + This special method enables setting parameter values using attribute syntax + (e.g., params.text = "Hello"). It only works for parameters that have been + previously defined. Args: - name (str): Name of the parameter - value (Any): Value to set for the parameter + name (str): Name of the parameter to set. + value (Any): Value to assign to the parameter. + + Raises: + AttributeError: If attempting to set a parameter that hasn't been defined. """ if name == "parameters": # Allow setting the parameters dict normally super().__setattr__(name, value) @@ -75,16 +105,20 @@ def __setattr__(self, name: str, value: Any) -> None: raise AttributeError(f"Parameter '{name}' is not defined") def __getattr__(self, name: str) -> Any: - """Allow getting parameter values using attribute syntax (e.g., params.text). + """Allow getting parameter values using attribute syntax. + + This special method enables accessing parameter values using attribute syntax + (e.g., params.text). It only works for parameters that have been previously + defined. Args: - name (str): Name of the parameter + name (str): Name of the parameter to access. Returns: - Any: Value of the parameter + Any: The value of the requested parameter. Raises: - AttributeError: If parameter is not defined + AttributeError: If attempting to access a parameter that hasn't been defined. """ if name in self.parameters: return self.parameters[name].value diff --git a/aixplain/decorators/api_key_checker.py b/aixplain/decorators/api_key_checker.py index 9fb317cb..b4e7cf84 100644 --- a/aixplain/decorators/api_key_checker.py +++ b/aixplain/decorators/api_key_checker.py @@ -2,6 +2,26 @@ def check_api_key(method): + """Decorator to verify that an API key is set before executing the method. + + This decorator checks if either TEAM_API_KEY or AIXPLAIN_API_KEY is set in the + configuration. If neither key is set, it raises an exception. + + Args: + method (callable): The method to be decorated. + + Returns: + callable: The wrapped method that includes API key verification. + + Raises: + Exception: If neither TEAM_API_KEY nor AIXPLAIN_API_KEY is set. + + Example: + @check_api_key + def my_api_method(): + # Method implementation + pass + """ def wrapper(*args, **kwargs): if config.TEAM_API_KEY == "" and config.AIXPLAIN_API_KEY == "": raise Exception( diff --git a/aixplain/enums/asset_status.py b/aixplain/enums/asset_status.py index 3d1e4323..e4357162 100644 --- a/aixplain/enums/asset_status.py +++ b/aixplain/enums/asset_status.py @@ -26,6 +26,32 @@ class AssetStatus(Text, Enum): + """Enumeration of possible status values for an asset in the aiXplain system. + + This enum defines all possible states that an asset can be in throughout its lifecycle, + from creation to deletion. Each enum value corresponds to a specific state in the + asset's lifecycle. + + Attributes: + DRAFT (str): Initial state for a newly created asset. + HIDDEN (str): Asset is hidden from public view. + SCHEDULED (str): Asset is scheduled for processing. + ONBOARDING (str): Asset is in the process of being onboarded. + ONBOARDED (str): Asset has been successfully onboarded. + PENDING (str): Asset is waiting for processing. + FAILED (str): Asset processing has failed. + TRAINING (str): Asset is currently in training. + REJECTED (str): Asset has been rejected. + ENABLING (str): Asset is in the process of being enabled. + DELETING (str): Asset is in the process of being deleted. + DISABLED (str): Asset has been disabled. + DELETED (str): Asset has been deleted. + IN_PROGRESS (str): Asset is currently being processed. + COMPLETED (str): Asset has completed processing. + CANCELING (str): Asset operation is being canceled. + CANCELED (str): Asset operation has been canceled. + DEPRECATED_DRAFT (str): Draft state that has been deprecated. + """ DRAFT = "draft" HIDDEN = "hidden" SCHEDULED = "scheduled" diff --git a/aixplain/enums/code_interpeter.py b/aixplain/enums/code_interpeter.py index 9f0a14c6..dc8621a1 100644 --- a/aixplain/enums/code_interpeter.py +++ b/aixplain/enums/code_interpeter.py @@ -2,9 +2,22 @@ class CodeInterpreterModel(str, Enum): - """Code Interpreter Model IDs""" + """Enumeration of available Code Interpreter model identifiers. + + This enum defines the unique identifiers for different code interpreter models + available in the system. Each value represents a specific model's ID that can + be used for code interpretation tasks. + + Attributes: + PYTHON_AZURE (str): Model ID for the Python code interpreter running on Azure. + """ PYTHON_AZURE = "67476fa16eb563d00060ad62" - def __str__(self): + def __str__(self) -> str: + """Return the string representation of the model ID. + + Returns: + str: The model ID value as a string. + """ return self._value_ diff --git a/aixplain/enums/data_split.py b/aixplain/enums/data_split.py index a3a297f4..04be2fdc 100644 --- a/aixplain/enums/data_split.py +++ b/aixplain/enums/data_split.py @@ -25,6 +25,16 @@ class DataSplit(Enum): + """Enumeration of dataset split types. + + This enum defines the standard dataset split types used for machine learning tasks, + including training, validation, and testing splits. + + Attributes: + TRAIN (str): Training dataset split used for model training. + VALIDATION (str): Validation dataset split used for model tuning. + TEST (str): Test dataset split used for final model evaluation. + """ TRAIN = "train" VALIDATION = "validation" TEST = "test" diff --git a/aixplain/enums/data_subtype.py b/aixplain/enums/data_subtype.py index ef11cab5..c518c3e1 100644 --- a/aixplain/enums/data_subtype.py +++ b/aixplain/enums/data_subtype.py @@ -25,6 +25,21 @@ class DataSubtype(Enum): + """Enumeration of data subtypes for categorizing and organizing data. + + This enum defines various subtypes that can be used to further categorize + data points within the system, particularly useful for demographic and + content-based categorization. + + Attributes: + AGE (str): Age category subtype. + GENDER (str): Gender category subtype. + INTERVAL (str): Time interval subtype. + OTHER (str): Miscellaneous/other subtype. + RACE (str): Race/ethnicity category subtype. + SPLIT (str): Data split category subtype. + TOPIC (str): Content topic subtype. + """ AGE = "age" GENDER = "gender" INTERVAL = "interval" @@ -33,5 +48,10 @@ class DataSubtype(Enum): SPLIT = "split" TOPIC = "topic" - def __str__(self): + def __str__(self) -> str: + """Return the string representation of the data subtype. + + Returns: + str: The data subtype value as a string. + """ return self._value_ diff --git a/aixplain/enums/data_type.py b/aixplain/enums/data_type.py index dcae0422..aa346a65 100644 --- a/aixplain/enums/data_type.py +++ b/aixplain/enums/data_type.py @@ -25,6 +25,24 @@ class DataType(str, Enum): + """Enumeration of supported data types in the aiXplain system. + + This enum defines all the data types that can be processed by the system, + including various media types and basic data types. + + Attributes: + AUDIO (str): Audio data type. + FLOAT (str): Floating-point number data type. + IMAGE (str): Image data type. + INTEGER (str): Integer number data type. + LABEL (str): Label/category data type. + TENSOR (str): Tensor/multi-dimensional array data type. + TEXT (str): Text data type. + VIDEO (str): Video data type. + EMBEDDING (str): Vector embedding data type. + NUMBER (str): Generic number data type. + BOOLEAN (str): Boolean data type. + """ AUDIO = "audio" FLOAT = "float" IMAGE = "image" @@ -37,5 +55,10 @@ class DataType(str, Enum): NUMBER = "number" BOOLEAN = "boolean" - def __str__(self): + def __str__(self) -> str: + """Return the string representation of the data type. + + Returns: + str: The data type value as a string. + """ return self._value_ diff --git a/aixplain/enums/database_source.py b/aixplain/enums/database_source.py index 7c5eaa67..4de42e4f 100644 --- a/aixplain/enums/database_source.py +++ b/aixplain/enums/database_source.py @@ -25,7 +25,16 @@ class DatabaseSourceType(Enum): - """Enum for database source types""" + """Enumeration of supported database source types. + + This enum defines the different types of database sources that can be used + for data storage and retrieval in the system. + + Attributes: + POSTGRESQL (str): PostgreSQL database source type. + SQLITE (str): SQLite database source type. + CSV (str): CSV file source type. + """ POSTGRESQL = "postgresql" SQLITE = "sqlite" diff --git a/aixplain/enums/embedding_model.py b/aixplain/enums/embedding_model.py index 4e77fa22..a0452902 100644 --- a/aixplain/enums/embedding_model.py +++ b/aixplain/enums/embedding_model.py @@ -21,10 +21,26 @@ class EmbeddingModel(str, Enum): + """Enumeration of available embedding models in the aiXplain system. + + This enum defines the unique identifiers for different embedding models that can + be used to generate vector representations of data. + + Attributes: + OPENAI_ADA002 (str): OpenAI's Ada-002 text embedding model ID. + JINA_CLIP_V2_MULTIMODAL (str): Jina CLIP v2 multimodal embedding model ID. + MULTILINGUAL_E5_LARGE (str): Multilingual E5 Large text embedding model ID. + BGE_M3 (str): BGE-M3 embedding model ID. + """ OPENAI_ADA002 = "6734c55df127847059324d9e" JINA_CLIP_V2_MULTIMODAL = "67c5f705d8f6a65d6f74d732" MULTILINGUAL_E5_LARGE = "67efd0772a0a850afa045af3" BGE_M3 = "67efd4f92a0a850afa045af7" - def __str__(self): + def __str__(self) -> str: + """Return the string representation of the embedding model ID. + + Returns: + str: The model ID value as a string. + """ return self._value_ diff --git a/aixplain/enums/file_type.py b/aixplain/enums/file_type.py index 5249ffb6..78ea46b7 100644 --- a/aixplain/enums/file_type.py +++ b/aixplain/enums/file_type.py @@ -25,6 +25,29 @@ class FileType(Enum): + """Enumeration of supported file types in the aiXplain system. + + This enum defines the file extensions for various file formats that can be + processed by the system, including document, audio, image, and video formats. + + Attributes: + CSV (str): Comma-separated values file (.csv). + JSON (str): JSON document file (.json). + TXT (str): Plain text file (.txt). + XML (str): XML document file (.xml). + FLAC (str): Free Lossless Audio Codec file (.flac). + MP3 (str): MP3 audio file (.mp3). + WAV (str): Waveform audio file (.wav). + JPEG (str): JPEG image file (.jpeg). + PNG (str): Portable Network Graphics file (.png). + JPG (str): JPEG image file (.jpg). + GIF (str): Graphics Interchange Format file (.gif). + WEBP (str): WebP image file (.webp). + AVI (str): Audio Video Interleave file (.avi). + MP4 (str): MPEG-4 video file (.mp4). + MOV (str): QuickTime movie file (.mov). + MPEG4 (str): MPEG-4 video file (.mpeg4). + """ CSV = ".csv" JSON = ".json" TXT = ".txt" diff --git a/aixplain/enums/function.py b/aixplain/enums/function.py index 462f2e38..7d4d998c 100644 --- a/aixplain/enums/function.py +++ b/aixplain/enums/function.py @@ -38,6 +38,19 @@ @dataclass class FunctionMetadata: + """Metadata container for function information. + + This class holds metadata about a function including its identifier, name, + description, parameters, outputs, and additional metadata. + + Attributes: + id (str): ID of the function. + name (str): Name of the function. + description (Optional[str]): Description of what the function does. + params (List[Dict[str, Any]]): List of parameter specifications. + output (List[Dict[str, Any]]): List of output specifications. + metadata (Dict[str, Any]): Additional metadata about the function. + """ id: str name: str description: Optional[str] = None @@ -46,6 +59,11 @@ class FunctionMetadata: metadata: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict: + """Convert the function metadata to a dictionary. + + Returns: + dict: Dictionary representation of the function metadata. + """ return { "id": self.id, "name": self.name, @@ -56,7 +74,15 @@ def to_dict(self) -> dict: } @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: dict) -> "FunctionMetadata": + """Create a FunctionMetadata instance from a dictionary. + + Args: + data (dict): Dictionary containing function metadata. + + Returns: + FunctionMetadata: New instance created from the dictionary data. + """ return cls( id=data.get("id"), name=data.get("name"), @@ -67,7 +93,21 @@ def from_dict(cls, data: dict): ) -def load_functions(): +def load_functions() -> Tuple[Enum, Dict]: + """Load function definitions from the backend or cache. + + This function attempts to load function definitions from the cache first. + If the cache is invalid or doesn't exist, it fetches the data from the + backend API. + + Returns: + Tuple[Function, Dict]: A tuple containing: + - Function: Dynamically created Function enum class + - Dict: Dictionary mapping function IDs to their input/output specifications + + Raises: + Exception: If functions cannot be loaded due to invalid API key or other errors. + """ api_key = config.TEAM_API_KEY backend_url = config.BACKEND_URL @@ -93,6 +133,24 @@ def load_functions(): cache.add_list(function_objects) class Function(str, Enum): + """Dynamic Function Enum class that represents available aiXplain functions. + + This class is dynamically created based on the available function definitions + from the aiXplain backend. Each enum value represents a function and provides + methods to access its parameters and specifications. + + The class inherits from both str and Enum to allow string operations while + maintaining enum functionality. + + Attributes: + _value_ (str): The underlying string value (function ID). + _parameters (Optional[FunctionParameters]): Cached parameters object for the function. + + Methods: + get_input_output_params(): Get dictionaries of input and output parameter specifications. + get_parameters(): Get or create a FunctionParameters object for parameter management. + """ + def __new__(cls, value): obj = str.__new__(cls, value) obj._value_ = value diff --git a/aixplain/enums/function_type.py b/aixplain/enums/function_type.py index f09d87e2..1f9474bb 100644 --- a/aixplain/enums/function_type.py +++ b/aixplain/enums/function_type.py @@ -25,6 +25,24 @@ class FunctionType(Enum): + """Enumeration of function types in the aiXplain system. + + This enum defines the different types of functions and services available + in the system, including AI models, data processing utilities, and + integration components. + + Attributes: + AI (str): Artificial Intelligence function type. + SEGMENTOR (str): Data segmentation function type. + RECONSTRUCTOR (str): Data reconstruction function type. + UTILITY (str): Utility function type. + METRIC (str): Metric/evaluation function type. + SEARCH (str): Search function type. + INTEGRATION (str): Integration connector function type. + CONNECTION (str): Connection function type. + MCP_CONNECTION (str): MCP connection function type. + MCPSERVER (str): MCP server function type. + """ AI = "ai" SEGMENTOR = "segmentor" RECONSTRUCTOR = "reconstructor" diff --git a/aixplain/enums/index_stores.py b/aixplain/enums/index_stores.py index 7cdfabb3..639a1b5d 100644 --- a/aixplain/enums/index_stores.py +++ b/aixplain/enums/index_stores.py @@ -2,13 +2,34 @@ class IndexStores(Enum): + """Enumeration of available index store providers in the aiXplain system. + + This enum defines the different index store providers that can be used for + storing and retrieving indexed data, along with their identifiers. + + Attributes: + AIR (dict): AIR index store configuration with name and ID. + VECTARA (dict): Vectara index store configuration with name and ID. + GRAPHRAG (dict): GraphRAG index store configuration with name and ID. + ZERO_ENTROPY (dict): Zero Entropy index store configuration with name and ID. + """ AIR = {"name": "air", "id": "66eae6656eb56311f2595011"} VECTARA = {"name": "vectara", "id": "655e20f46eb563062a1aa301"} GRAPHRAG = {"name": "graphrag", "id": "67dd6d487cbf0a57cf4b72f3"} ZERO_ENTROPY = {"name": "zeroentropy", "id": "6807949168e47e7844c1f0c5"} - def __str__(self): + def __str__(self) -> str: + """Return the name of the index store. + + Returns: + str: The name value from the index store configuration. + """ return self.value["name"] - def get_model_id(self): + def get_model_id(self) -> str: + """Return the model ID of the index store. + + Returns: + str: The ID value from the index store configuration. + """ return self.value["id"] diff --git a/aixplain/enums/language.py b/aixplain/enums/language.py index a660024a..27a58f8d 100644 --- a/aixplain/enums/language.py +++ b/aixplain/enums/language.py @@ -32,6 +32,18 @@ @dataclass class LanguageMetadata: + """Metadata container for language information. + + This class holds metadata about a language including its identifier, value, + label, dialects, and supported scripts. + + Attributes: + id (str): ID of the language. + value (str): Language code or value. + label (str): Label for the language. + dialects (List[Dict[str, str]]): List of dialect specifications. + scripts (List[Any]): List of supported scripts for the language. + """ id: str value: str label: str @@ -39,6 +51,11 @@ class LanguageMetadata: scripts: List[Any] = field(default_factory=list) def to_dict(self) -> dict: + """Convert the language metadata to a dictionary. + + Returns: + dict: Dictionary representation of the language metadata. + """ return { "id": self.id, "value": self.value, @@ -48,7 +65,15 @@ def to_dict(self) -> dict: } @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: dict) -> "LanguageMetadata": + """Create a LanguageMetadata instance from a dictionary. + + Args: + data (dict): Dictionary containing language metadata. + + Returns: + LanguageMetadata: New instance created from the dictionary data. + """ return cls( id=data.get("id"), value=data.get("value"), @@ -57,7 +82,20 @@ def from_dict(cls, data: dict): scripts=data.get("scripts", []), ) -def load_languages(): +def load_languages() -> Enum: + """Load language definitions from the backend or cache. + + This function attempts to load language definitions from the cache first. + If the cache is invalid or doesn't exist, it fetches the data from the + backend API. It creates a dynamic Enum class containing all available + languages and their dialects. + + Returns: + Enum: Dynamically created Language enum class with language codes and dialects. + + Raises: + Exception: If languages cannot be loaded due to invalid API key or other errors. + """ api_key = config.TEAM_API_KEY backend_url = config.BACKEND_URL diff --git a/aixplain/enums/license.py b/aixplain/enums/license.py index da1623c4..3cd7cf2b 100644 --- a/aixplain/enums/license.py +++ b/aixplain/enums/license.py @@ -32,6 +32,18 @@ @dataclass class LicenseMetadata: + """Metadata container for license information. + + This class holds metadata about a license including its identifier, name, + description, URL, and custom URL settings. + + Attributes: + id (str): ID of the license. + name (str): Name of the license. + description (str): Description of the license terms. + url (str): URL to the license text or details. + allowCustomUrl (bool): Whether custom URLs are allowed for this license. + """ id: str name: str description: str @@ -39,6 +51,11 @@ class LicenseMetadata: allowCustomUrl: bool def to_dict(self) -> dict: + """Convert the license metadata to a dictionary. + + Returns: + dict: Dictionary representation of the license metadata. + """ return { "id": self.id, "name": self.name, @@ -48,7 +65,15 @@ def to_dict(self) -> dict: } @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: dict) -> "LicenseMetadata": + """Create a LicenseMetadata instance from a dictionary. + + Args: + data (dict): Dictionary containing license metadata. + + Returns: + LicenseMetadata: New instance created from the dictionary data. + """ return cls( id=data.get("id"), name=data.get("name"), @@ -58,8 +83,20 @@ def from_dict(cls, data: dict): ) -def load_licenses(): +def load_licenses() -> Enum: + """Load license definitions from the backend or cache. + + This function attempts to load license definitions from the cache first. + If the cache is invalid or doesn't exist, it fetches the data from the + backend API. It creates a dynamic Enum class containing all available + licenses. + + Returns: + Enum: Dynamically created License enum class with license identifiers. + Raises: + Exception: If licenses cannot be loaded due to invalid API key or other errors. + """ try: api_key = config.TEAM_API_KEY backend_url = config.BACKEND_URL diff --git a/aixplain/enums/onboard_status.py b/aixplain/enums/onboard_status.py index 2efb7755..4881f58c 100644 --- a/aixplain/enums/onboard_status.py +++ b/aixplain/enums/onboard_status.py @@ -25,6 +25,17 @@ class OnboardStatus(Enum): + """Enumeration of possible onboarding status values. + + This enum defines all possible states that an onboarding process can be in, + from initial onboarding to completed or failed states. + + Attributes: + ONBOARDING (str): Initial onboarding state. + ONBOARDED (str): Successful onboarding state. + FAILED (str): Failed onboarding state. + DELETED (str): Deleted onboarding state. + """ ONBOARDING = "onboarding" ONBOARDED = "onboarded" FAILED = "failed" diff --git a/aixplain/enums/ownership_type.py b/aixplain/enums/ownership_type.py index 951a9f85..1c5dabbe 100644 --- a/aixplain/enums/ownership_type.py +++ b/aixplain/enums/ownership_type.py @@ -25,8 +25,22 @@ class OwnershipType(Enum): + """Enumeration of possible ownership types. + + This enum defines the different types of ownership that can be associated with + an asset or resource, including subscribed and owned ownership. + + Attributes: + SUBSCRIBED (str): Subscribed ownership type. + OWNED (str): Owned ownership type. + """ SUBSCRIBED = "SUBSCRIBED" OWNED = "OWNED" def __str__(self): + """Return the string representation of the ownership type. + + Returns: + str: The ownership type value as a string. + """ return self._value_ diff --git a/aixplain/enums/privacy.py b/aixplain/enums/privacy.py index 8f317d6f..35f87c9c 100644 --- a/aixplain/enums/privacy.py +++ b/aixplain/enums/privacy.py @@ -25,6 +25,16 @@ class Privacy(Enum): + """Enumeration of possible privacy levels. + + This enum defines the different levels of privacy that can be associated with + an asset or resource, including public, private, and restricted privacy levels. + + Attributes: + PUBLIC (str): Public privacy level. + PRIVATE (str): Private privacy level. + RESTRICTED (str): Restricted privacy level. + """ PUBLIC = "Public" PRIVATE = "Private" RESTRICTED = "Restricted" diff --git a/aixplain/enums/response_status.py b/aixplain/enums/response_status.py index 257b9427..46b78f15 100644 --- a/aixplain/enums/response_status.py +++ b/aixplain/enums/response_status.py @@ -26,9 +26,24 @@ class ResponseStatus(Text, Enum): + """Enumeration of possible response status values. + + This enum defines the different statuses that a response can be in, including + in progress, success, and failure. + + Attributes: + IN_PROGRESS (str): Response is in progress. + SUCCESS (str): Response was successful. + FAILED (str): Response failed. + """ IN_PROGRESS = "IN_PROGRESS" SUCCESS = "SUCCESS" FAILED = "FAILED" def __str__(self): + """Return the string representation of the response status. + + Returns: + str: The response status value as a string. + """ return self.value diff --git a/aixplain/enums/sort_by.py b/aixplain/enums/sort_by.py index 5e3acec8..b8ecfd3b 100644 --- a/aixplain/enums/sort_by.py +++ b/aixplain/enums/sort_by.py @@ -25,6 +25,16 @@ class SortBy(Enum): + """Enumeration of possible sorting criteria. + + This enum defines the different criteria that can be used to sort assets, + including creation date, price, and popularity. + + Attributes: + CREATION_DATE (str): Sort by creation date. + PRICE (str): Sort by normalized price. + POPULARITY (str): Sort by total number of subscriptions. + """ CREATION_DATE = "createdAt" PRICE = "normalizedPrice" POPULARITY = "totalSubscribed" diff --git a/aixplain/enums/sort_order.py b/aixplain/enums/sort_order.py index 85e10f7a..393be4a6 100644 --- a/aixplain/enums/sort_order.py +++ b/aixplain/enums/sort_order.py @@ -25,5 +25,14 @@ class SortOrder(Enum): + """Enumeration of possible sorting orders. + + This enum defines the different directions that can be used to sort assets, + including ascending and descending order. + + Attributes: + ASCENDING (int): Sort in ascending order. + DESCENDING (int): Sort in descending order. + """ ASCENDING = 1 DESCENDING = -1 diff --git a/aixplain/enums/splitting_options.py b/aixplain/enums/splitting_options.py index 51dcaab3..919ebb0e 100644 --- a/aixplain/enums/splitting_options.py +++ b/aixplain/enums/splitting_options.py @@ -25,6 +25,18 @@ class SplittingOptions(str, Enum): + """Enumeration of possible splitting options. + + This enum defines the different ways that text can be split into chunks, + including by word, sentence, passage, page, and line. + + Attributes: + WORD (str): Split by word. + SENTENCE (str): Split by sentence. + PASSAGE (str): Split by passage. + PAGE (str): Split by page. + LINE (str): Split by line. + """ WORD = "word" SENTENCE = "sentence" PASSAGE = "passage" @@ -32,4 +44,9 @@ class SplittingOptions(str, Enum): LINE = "line" def __str__(self): + """Return the string representation of the splitting option. + + Returns: + str: The splitting option value as a string. + """ return self._value_ diff --git a/aixplain/enums/status.py b/aixplain/enums/status.py index 3c84f2be..f16a8465 100644 --- a/aixplain/enums/status.py +++ b/aixplain/enums/status.py @@ -3,6 +3,16 @@ class Status(Text, Enum): + """Enumeration of possible status values. + + This enum defines the different statuses that a task or operation can be in, + including failed, in progress, and success. + + Attributes: + FAILED (str): Task failed. + IN_PROGRESS (str): Task is in progress. + SUCCESS (str): Task was successful. + """ FAILED = "failed" IN_PROGRESS = "in_progress" SUCCESS = "success" diff --git a/aixplain/enums/storage_type.py b/aixplain/enums/storage_type.py index 672d0c67..da6f7607 100644 --- a/aixplain/enums/storage_type.py +++ b/aixplain/enums/storage_type.py @@ -25,9 +25,24 @@ class StorageType(Enum): + """Enumeration of possible storage types. + + This enum defines the different types of storage that can be used to store + assets, including text, URL, and file. + + Attributes: + TEXT (str): Text storage type. + URL (str): URL storage type. + FILE (str): File storage type. + """ TEXT = "text" URL = "url" FILE = "file" def __str__(self): + """Return the string representation of the storage type. + + Returns: + str: The storage type value as a string. + """ return self._value_ diff --git a/aixplain/enums/supplier.py b/aixplain/enums/supplier.py index 18a3e81d..917d6942 100644 --- a/aixplain/enums/supplier.py +++ b/aixplain/enums/supplier.py @@ -30,14 +30,38 @@ import re -def clean_name(name): +def clean_name(name: str) -> str: + """Clean a supplier name by replacing spaces and special characters with underscores. + + This function takes a supplier name and performs the following transformations: + 1. Replaces spaces and hyphens with underscores. + 2. Removes any non-alphanumeric characters. + 3. Removes any leading numbers. + + Args: + name (str): The supplier name to clean. + + Returns: + str: The cleaned supplier name. + """ cleaned_name = re.sub(r"[ -]+", "_", name) cleaned_name = re.sub(r"[^a-zA-Z0-9_]", "", cleaned_name) cleaned_name = re.sub(r"^\d+", "", cleaned_name) return cleaned_name.upper() -def load_suppliers(): +def load_suppliers() -> Enum: + """Load suppliers from the backend or cache. + + This function fetches supplier information from the backend API and creates + an Enum class with supplier names as keys. + + Returns: + Enum: An Enum class with supplier names as keys. + + Raises: + Exception: If suppliers cannot be loaded due to invalid API key or other errors. + """ api_key = config.TEAM_API_KEY backend_url = config.BACKEND_URL diff --git a/aixplain/exceptions/types.py b/aixplain/exceptions/types.py index 56c710b5..5162662f 100644 --- a/aixplain/exceptions/types.py +++ b/aixplain/exceptions/types.py @@ -3,7 +3,17 @@ class ErrorSeverity(str, Enum): - """Severity levels for errors.""" + """Enumeration of error severity levels in the aiXplain system. + + This enum defines the different levels of severity that can be assigned to + errors, ranging from informational messages to critical system errors. + + Attributes: + INFO (str): Informational message, not an actual error. + WARNING (str): Warning that doesn't prevent operation completion. + ERROR (str): Error condition that prevents operation completion. + CRITICAL (str): Severe error that might affect system stability. + """ INFO = "info" # Informational, not an error WARNING = "warning" # Warning, operation can continue @@ -12,7 +22,23 @@ class ErrorSeverity(str, Enum): class ErrorCategory(Enum): - """Categorizes errors by their domain.""" + """Enumeration of error categories in the aiXplain system. + + This enum defines the different domains or areas where errors can occur, + helping to classify and organize error handling. + + Attributes: + AUTHENTICATION (str): Authentication and authorization errors. + VALIDATION (str): Input validation errors. + RESOURCE (str): Resource availability and access errors. + BILLING (str): Billing and payment-related errors. + SUPPLIER (str): External supplier and third-party service errors. + NETWORK (str): Network connectivity errors. + SERVICE (str): Service availability errors. + INTERNAL (str): Internal system errors. + AGENT (str): Agent-specific errors. + UNKNOWN (str): Uncategorized or unclassified errors. + """ AUTHENTICATION = "authentication" # API keys, permissions VALIDATION = "validation" # Input validation @@ -59,12 +85,31 @@ class inheriting from the corresponding category exception (e.g., AX_SVC_ERROR = "AX-SVC-1000" # General service error. Use when a specific aiXplain service or endpoint is unavailable or malfunctioning (e.g., service downtime, internal component failure. AX_INT_ERROR = "AX-INT-1000" # General internal error. Use for unexpected server-side errors that are not covered by other categories. This often indicates a bug or an issue within the aiXplain platform itself. - def __str__(self): + def __str__(self) -> str: + """Return the string representation of the error code. + + Returns: + str: The error code value as a string. + """ return self.value class AixplainBaseException(Exception): - """Base exception class for all aiXplain exceptions.""" + """Base exception class for all aiXplain exceptions. + + This class serves as the foundation for all custom exceptions in the aiXplain + system. It provides structured error information including categorization, + severity, and additional context. + + Attributes: + message (str): Error message. + category (ErrorCategory): Category of the error. + severity (ErrorSeverity): Severity level of the error. + status_code (Optional[int]): HTTP status code if applicable. + details (Dict[str, Any]): Additional error context and details. + retry_recommended (bool): Whether retrying the operation might succeed. + error_code (Optional[ErrorCode]): Standardized error code. + """ def __init__( self, @@ -85,12 +130,24 @@ def __init__( self.error_code = error_code super().__init__(self.message) - def __str__(self): + def __str__(self) -> str: + """Return a string representation of the exception. + + Returns: + str: Formatted string containing the exception class name, + error code (if present), and error message. + """ error_code_str = f" [{self.error_code}]" if self.error_code else "" return f"{self.__class__.__name__}{error_code_str}: {self.message}" def to_dict(self) -> Dict[str, Any]: - """Convert exception to dictionary for serialization.""" + """Convert the exception to a dictionary for serialization. + + Returns: + Dict[str, Any]: Dictionary containing all exception attributes + including message, category, severity, status code, details, + retry recommendation, and error code. + """ return { "message": self.message, "category": self.category.value, diff --git a/aixplain/factories/agent_factory/__init__.py b/aixplain/factories/agent_factory/__init__.py index b06dfaf2..bf3b41ff 100644 --- a/aixplain/factories/agent_factory/__init__.py +++ b/aixplain/factories/agent_factory/__init__.py @@ -48,6 +48,11 @@ class AgentFactory: + """Factory class for creating and managing agents in the aiXplain system. + + This class provides class methods for creating various types of agents and tools, + as well as managing existing agents in the platform. + """ @classmethod def create( cls, @@ -165,7 +170,17 @@ def create( @classmethod def create_from_dict(cls, dict: Dict) -> Agent: - """Create an agent from a dictionary.""" + """Create an agent instance from a dictionary representation. + + Args: + dict (Dict): Dictionary containing agent configuration and properties. + + Returns: + Agent: Instantiated agent object with properties from the dictionary. + + Raises: + Exception: If agent validation fails or required properties are missing. + """ agent = Agent.from_dict(dict) agent.validate(raise_exception=True) agent.url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent.id}/run") @@ -175,6 +190,18 @@ def create_from_dict(cls, dict: Dict) -> Agent: def create_task( cls, name: Text, description: Text, expected_output: Text, dependencies: Optional[List[Text]] = None ) -> AgentTask: + """Create a new task for an agent. + + Args: + name (Text): Name of the task. + description (Text): Description of what the task should accomplish. + expected_output (Text): Description of the expected output format. + dependencies (Optional[List[Text]], optional): List of task names that must + complete before this task can start. Defaults to None. + + Returns: + AgentTask: Created task object. + """ return AgentTask(name=name, description=description, expected_output=expected_output, dependencies=dependencies) @classmethod @@ -187,7 +214,22 @@ def create_model_tool( parameters: Optional[Dict] = None, name: Optional[Text] = None, ) -> ModelTool: - """Create a new model tool.""" + """Create a new model tool for use with an agent. + + Args: + model (Optional[Union[Model, Text]], optional): Model instance or ID. Defaults to None. + function (Optional[Union[Function, Text]], optional): Function enum or ID. Defaults to None. + supplier (Optional[Union[Supplier, Text]], optional): Supplier enum or name. Defaults to None. + description (Text, optional): Description of the tool. Defaults to "". + parameters (Optional[Dict], optional): Tool parameters. Defaults to None. + name (Optional[Text], optional): Name of the tool. Defaults to None. + + Returns: + ModelTool: Created model tool object. + + Raises: + AssertionError: If the supplier is not valid. + """ if function is not None and isinstance(function, str): function = Function(function) @@ -206,19 +248,43 @@ def create_model_tool( def create_pipeline_tool( cls, description: Text, pipeline: Union[Pipeline, Text], name: Optional[Text] = None ) -> PipelineTool: - """Create a new pipeline tool.""" + """Create a new pipeline tool for use with an agent. + + Args: + description (Text): Description of what the pipeline tool does. + pipeline (Union[Pipeline, Text]): Pipeline instance or pipeline ID. + name (Optional[Text], optional): Name of the tool. Defaults to None. + + Returns: + PipelineTool: Created pipeline tool object. + """ return PipelineTool(description=description, pipeline=pipeline, name=name) @classmethod def create_python_interpreter_tool(cls) -> PythonInterpreterTool: - """Create a new python interpreter tool.""" + """Create a new Python interpreter tool for use with an agent. + + This tool allows the agent to execute Python code in a controlled environment. + + Returns: + PythonInterpreterTool: Created Python interpreter tool object. + """ return PythonInterpreterTool() @classmethod def create_custom_python_code_tool( cls, code: Union[Text, Callable], name: Text, description: Text = "" ) -> CustomPythonCodeTool: - """Create a new custom python code tool.""" + """Create a new custom Python code tool for use with an agent. + + Args: + code (Union[Text, Callable]): Python code as string or callable function. + name (Text): Name of the tool. + description (Text, optional): Description of what the tool does. Defaults to "". + + Returns: + CustomPythonCodeTool: Created custom Python code tool object. + """ return CustomPythonCodeTool(name=name, description=description, code=code) @classmethod @@ -356,7 +422,18 @@ def create_sql_tool( @classmethod def list(cls) -> Dict: - """List all agents available in the platform.""" + """List all agents available in the platform. + + Returns: + Dict: Dictionary containing: + - results (List[Agent]): List of available agents. + - page_total (int): Number of agents in current page. + - page_number (int): Current page number. + - total (int): Total number of agents. + + Raises: + Exception: If there is an error listing the agents. + """ from aixplain.factories.agent_factory.utils import build_agent url = urljoin(config.BACKEND_URL, "sdk/agents") @@ -390,7 +467,19 @@ def list(cls) -> Dict: @classmethod def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> Agent: - """Get agent by id.""" + """Retrieve an agent by its ID. + + Args: + agent_id (Text): ID of the agent to retrieve. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + + Returns: + Agent: Retrieved agent object. + + Raises: + Exception: If the agent cannot be retrieved or doesn't exist. + """ from aixplain.factories.agent_factory.utils import build_agent url = urljoin(config.BACKEND_URL, f"sdk/agents/{agent_id}") diff --git a/aixplain/factories/agent_factory/utils.py b/aixplain/factories/agent_factory/utils.py index 450da273..ef895626 100644 --- a/aixplain/factories/agent_factory/utils.py +++ b/aixplain/factories/agent_factory/utils.py @@ -116,7 +116,19 @@ def build_tool(tool: Dict): def build_llm(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> LLM: - """Build a LLM from a dictionary.""" + """Build a Large Language Model (LLM) instance from a dictionary configuration. + + This function attempts to create an LLM instance either from a cached LLM object + in the payload or by creating a new instance using the provided configuration. + + Args: + payload (Dict): Dictionary containing LLM configuration and possibly a cached + LLM object. + api_key (Text, optional): API key for authentication. Defaults to config.TEAM_API_KEY. + + Returns: + LLM: Instantiated LLM object with configured parameters. + """ # Get LLM from tools if present llm = None # First check if we have the LLM object @@ -152,7 +164,25 @@ def build_llm(payload: Dict, api_key: Text = config.TEAM_API_KEY) -> LLM: def build_agent(payload: Dict, tools: List[Tool] = None, api_key: Text = config.TEAM_API_KEY) -> Agent: - """Instantiate a new agent in the platform.""" + """Build an agent instance from a dictionary configuration. + + This function creates an agent with its associated tools, LLM, and tasks based + on the provided configuration. + + Args: + payload (Dict): Dictionary containing agent configuration including tools, + LLM settings, and tasks. + tools (List[Tool], optional): List of pre-configured tools to use. If None, + tools will be built from the payload. Defaults to None. + api_key (Text, optional): API key for authentication. Defaults to config.TEAM_API_KEY. + + Returns: + Agent: Instantiated agent object with configured tools, LLM, and tasks. + + Raises: + ValueError: If a tool type is not supported. + AssertionError: If tool configuration is invalid. + """ tools_dict = payload["assets"] payload_tools = tools if payload_tools is None: diff --git a/aixplain/factories/api_key_factory.py b/aixplain/factories/api_key_factory.py index 3d081e27..5251ac65 100644 --- a/aixplain/factories/api_key_factory.py +++ b/aixplain/factories/api_key_factory.py @@ -8,11 +8,32 @@ class APIKeyFactory: + """Factory class for managing API keys in the aiXplain platform. + + This class provides functionality for creating, retrieving, updating, and + monitoring API keys, including their usage limits and budgets. + + Attributes: + backend_url (str): Base URL for the aiXplain backend API. + """ backend_url = config.BACKEND_URL @classmethod def get(cls, api_key: Text) -> APIKey: - """Get an API key""" + """Retrieve an API key by its value. + + This method searches for an API key by matching the first and last 4 + characters of the provided key. + + Args: + api_key (Text): The API key value to search for. + + Returns: + APIKey: The matching API key object. + + Raises: + Exception: If no matching API key is found. + """ for api_key_obj in cls.list(): if str(api_key_obj.access_key).startswith(api_key[:4]) and str(api_key_obj.access_key).endswith(api_key[-4:]): return api_key_obj @@ -20,7 +41,18 @@ def get(cls, api_key: Text) -> APIKey: @classmethod def list(cls) -> List[APIKey]: - """List all API keys""" + """List all API keys accessible to the current user. + + This method retrieves all API keys that the authenticated user has access to, + using the configured TEAM_API_KEY. + + Returns: + List[APIKey]: List of API key objects. + + Raises: + Exception: If the API request fails or returns an error, including cases + where authentication fails or the service is unavailable. + """ resp = "Unspecified error" try: url = f"{cls.backend_url}/sdk/api-keys" @@ -58,7 +90,27 @@ def create( asset_limits: List[Union[Dict, APIKeyLimits]], expires_at: datetime, ) -> APIKey: - """Create a new API key""" + """Create a new API key with specified limits and budget. + + This method creates a new API key with configured usage limits, budget, + and expiration date. + + Args: + name (Text): Name or description for the API key. + budget (int): Total budget allocated to this API key. + global_limits (Union[Dict, APIKeyLimits]): Global usage limits for the key, + either as a dictionary or APIKeyLimits object. + asset_limits (List[Union[Dict, APIKeyLimits]]): List of per-asset usage + limits, each either as a dictionary or APIKeyLimits object. + expires_at (datetime): Expiration date and time for the API key. + + Returns: + APIKey: Created API key object with its access key and configuration. + + Raises: + Exception: If the API request fails or returns an error, including cases + where validation fails or the service is unavailable. + """ resp = "Unspecified error" url = f"{cls.backend_url}/sdk/api-keys" headers = {"Content-Type": "application/json", "Authorization": f"Token {config.TEAM_API_KEY}"} @@ -91,7 +143,25 @@ def create( @classmethod def update(cls, api_key: APIKey) -> APIKey: - """Update an existing API key""" + """Update an existing API key's configuration. + + This method updates an API key's settings such as limits, budget, and + expiration date. The API key must be validated before update. + + Args: + api_key (APIKey): API key object with updated configuration. + Must have a valid ID of an existing key. + + Returns: + APIKey: Updated API key object with new configuration. + + Raises: + Exception: If: + - API key validation fails + - API key ID is invalid + - Update request fails + - Service is unavailable + """ api_key.validate() try: resp = "Unspecified error" @@ -122,7 +192,31 @@ def update(cls, api_key: APIKey) -> APIKey: @classmethod def get_usage_limits(cls, api_key: Text = config.TEAM_API_KEY, asset_id: Optional[Text] = None) -> List[APIKeyUsageLimit]: - """Get API key usage limits""" + """Retrieve current usage limits and counts for an API key. + + This method fetches the current usage statistics and limits for an API key, + optionally filtered by a specific asset. + + Args: + api_key (Text, optional): API key to check usage for. Defaults to + config.TEAM_API_KEY. + asset_id (Optional[Text], optional): Filter usage limits for a specific + asset. Defaults to None, showing all assets. + + Returns: + List[APIKeyUsageLimit]: List of usage limit objects containing: + - daily_request_count: Current number of requests today + - daily_request_limit: Maximum allowed requests per day + - daily_token_count: Current number of tokens used today + - daily_token_limit: Maximum allowed tokens per day + - model: Asset ID if limit is asset-specific, None if global + + Raises: + Exception: If: + - API key is invalid + - User is not the key owner + - Service is unavailable + """ try: url = f"{config.BACKEND_URL}/sdk/api-keys/usage-limits" headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} diff --git a/aixplain/factories/asset_factory.py b/aixplain/factories/asset_factory.py index 56d938db..a4d1c429 100644 --- a/aixplain/factories/asset_factory.py +++ b/aixplain/factories/asset_factory.py @@ -28,9 +28,17 @@ class AssetFactory: + """Base class for asset factories. - backend_url = config.BACKEND_URL + This class provides a common interface for creating and retrieving assets + from the aiXplain platform. Subclasses should implement the abstract methods + to define specific asset types. + + Attributes: + backend_url (str): Base URL for the aiXplain backend API. + """ + backend_url = config.BACKEND_URL @abstractmethod def get(self, asset_id: Text) -> Asset: """Create a 'Asset' object from id diff --git a/aixplain/factories/benchmark_factory.py b/aixplain/factories/benchmark_factory.py index 1c4408ea..87f60911 100644 --- a/aixplain/factories/benchmark_factory.py +++ b/aixplain/factories/benchmark_factory.py @@ -37,35 +37,43 @@ class BenchmarkFactory: - """A static class for creating and managing the Benchmarking experience. + """Factory class for creating and managing benchmarks in the aiXplain platform. + + This class provides functionality for creating benchmarks, managing benchmark jobs, + retrieving results, and configuring normalization options. Benchmarks can be used + to evaluate and compare multiple models using specified datasets and metrics. Attributes: - backend_url (str): The URL for the backend. + backend_url (str): Base URL for the aiXplain backend API. """ backend_url = config.BACKEND_URL @classmethod def _create_benchmark_job_from_response(cls, response: Dict) -> BenchmarkJob: - """Converts response Json to 'BenchmarkJob' object + """Convert API response into a BenchmarkJob object. Args: - response (Dict): Json from API + response (Dict): API response containing: + - jobId: Unique job identifier + - status: Current job status + - benchmark: Dictionary containing benchmark information Returns: - BenchmarkJob: Coverted 'BenchmarkJob' object + BenchmarkJob: Instantiated benchmark job object. """ return BenchmarkJob(response["jobId"], response["status"], response["benchmark"]["id"]) @classmethod def _get_benchmark_jobs_from_benchmark_id(cls, benchmark_id: Text) -> List[BenchmarkJob]: - """Get list of benchmark jobs from benchmark id + """Retrieve all jobs associated with a benchmark. Args: - benchmark_id (Text): ID of benchmark + benchmark_id (Text): Unique identifier of the benchmark. Returns: - List[BenchmarkJob]: List of associated benchmark jobs + List[BenchmarkJob]: List of benchmark job objects associated with + the specified benchmark. """ url = urljoin(cls.backend_url, f"sdk/benchmarks/{benchmark_id}/jobs") @@ -77,13 +85,21 @@ def _get_benchmark_jobs_from_benchmark_id(cls, benchmark_id: Text) -> List[Bench @classmethod def _create_benchmark_from_response(cls, response: Dict) -> Benchmark: - """Converts response Json to 'Benchmark' object + """Convert API response into a Benchmark object. + + This method creates a Benchmark object by fetching and instantiating all + associated models, datasets, metrics, and jobs. Args: - response (Dict): Json from API + response (Dict): API response containing: + - id: Benchmark identifier + - name: Benchmark name + - model: List of model configurations + - datasets: List of dataset IDs + - metrics: List of metric configurations Returns: - Benchmark: Coverted 'Benchmark' object + Benchmark: Instantiated benchmark object with all components loaded. """ model_list = [ModelFactory().get(model_info["id"]) for model_info in response["model"]] dataset_list = [DatasetFactory().get(dataset_id) for dataset_id in response["datasets"]] @@ -93,13 +109,22 @@ def _create_benchmark_from_response(cls, response: Dict) -> Benchmark: @classmethod def get(cls, benchmark_id: str) -> Benchmark: - """Create a 'Benchmark' object from Benchmark id + """Retrieve a benchmark by its ID. + + This method fetches a benchmark and all its associated components + (models, datasets, metrics, jobs) from the platform. Args: - benchmark_id (Text): Benchmark ID of required Benchmark. + benchmark_id (str): Unique identifier of the benchmark to retrieve. Returns: - Benchmark: Created 'Benchmark' object + Benchmark: Retrieved benchmark object with all components loaded. + + Raises: + Exception: If: + - Benchmark ID is invalid + - Authentication fails + - Service is unavailable """ resp = None try: @@ -130,13 +155,16 @@ def get(cls, benchmark_id: str) -> Benchmark: @classmethod def get_job(cls, job_id: Text) -> BenchmarkJob: - """Create a 'BenchmarkJob' object from job id + """Retrieve a benchmark job by its ID. Args: - job_id (Text): ID of the required BenchmarkJob. + job_id (Text): Unique identifier of the benchmark job to retrieve. Returns: - BenchmarkJob: Created 'BenchmarkJob' object + BenchmarkJob: Retrieved benchmark job object with its current status. + + Raises: + Exception: If the job ID is invalid or the request fails. """ url = urljoin(cls.backend_url, f"sdk/benchmarks/jobs/{job_id}") headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} @@ -170,14 +198,22 @@ def _validate_create_benchmark_payload(cls, payload): @classmethod def _reformat_model_list(cls, model_list: List[Model]) -> Tuple[List[Any], List[Any]]: - """Reformat the model list to be used in the create benchmark API + """Reformat a list of models for the benchmark creation API. + + This method separates models into two lists based on whether they have + additional configuration information. Args: - model_list (List[Model]): List of models to be used in the benchmark + model_list (List[Model]): List of models to be used in the benchmark. Returns: - Tuple[List[Any], List[Any]]: Reformatted model lists + Tuple[List[Any], List[Any]]: A tuple containing: + - List of model IDs for models without additional parameters + - List of model configurations for models with parameters, or None + if no models have parameters + Raises: + Exception: If some models have additional info and others don't. """ model_list_without_parms, model_list_with_parms = [], [] for model in model_list: @@ -195,17 +231,32 @@ def _reformat_model_list(cls, model_list: List[Model]) -> Tuple[List[Any], List[ @classmethod def create(cls, name: str, dataset_list: List[Dataset], model_list: List[Model], metric_list: List[Metric]) -> Benchmark: - """Creates a benchmark based on the information provided like name, dataset list, model list and score list. - Note: This only creates a benchmark. It needs to run seperately using start_benchmark_job. + """Create a new benchmark configuration. + + This method creates a new benchmark that can be used to evaluate and compare + multiple models using specified datasets and metrics. Note that this only + creates the benchmark configuration - you need to run it separately using + start_benchmark_job. Args: - name (str): Unique Name of benchmark - dataset_list (List[Dataset]): List of Datasets to be used for benchmarking - model_list (List[Model]): List of Models to be used for benchmarking - metric_list (List[Metric]): List of Metrics to be used for benchmarking + name (str): Unique name for the benchmark. + dataset_list (List[Dataset]): List of datasets to use for evaluation. + Currently only supports a single dataset. + model_list (List[Model]): List of models to evaluate. All models must + either have additional configuration info or none should have it. + metric_list (List[Metric]): List of metrics to use for evaluation. + Must provide at least one metric. Returns: - Benchmark: _description_ + Benchmark: Created benchmark object ready for execution. + + Raises: + Exception: If: + - No dataset is provided or multiple datasets are provided + - No metrics are provided + - No models are provided + - Model configuration is inconsistent + - Request fails or returns an error """ payload = {} try: @@ -243,14 +294,23 @@ def create(cls, name: str, dataset_list: List[Dataset], model_list: List[Model], @classmethod def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: - """Get list of supported normalization options for a metric and model to be used in benchmarking + """List supported normalization options for a metric-model pair. + + This method retrieves the list of normalization options that can be used + when evaluating a specific model with a specific metric in a benchmark. Args: - metric (Metric): Metric for which normalization options are to be listed - model(Model): Model to be used in benchmarking + metric (Metric): Metric to get normalization options for. + model (Model): Model to check compatibility with. Returns: - List[str]: List of supported normalization options + List[str]: List of supported normalization option identifiers. + + Raises: + Exception: If: + - Metric or model is invalid + - Request fails + - Service is unavailable """ try: url = urljoin(cls.backend_url, "sdk/benchmarks/normalization-options") @@ -274,7 +334,23 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: raise Exception(error_message) @classmethod - def get_benchmark_job_scores(cls, job_id): + def get_benchmark_job_scores(cls, job_id: Text) -> Any: + """Retrieve and format benchmark job scores. + + This method fetches the scores from a benchmark job and formats them into + a pandas DataFrame, with model names properly formatted to include supplier + and version information. + + Args: + job_id (Text): Unique identifier of the benchmark job. + + Returns: + pandas.DataFrame: DataFrame containing benchmark scores with formatted + model names. + + Raises: + Exception: If the job ID is invalid or the request fails. + """ def __get_model_name(model_id): model = ModelFactory.get(model_id) supplier = str(model.supplier) diff --git a/aixplain/factories/cli/model_factory_cli.py b/aixplain/factories/cli/model_factory_cli.py index 9c69ca4f..ce371394 100644 --- a/aixplain/factories/cli/model_factory_cli.py +++ b/aixplain/factories/cli/model_factory_cli.py @@ -30,14 +30,17 @@ @click.command("hosts") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment") def list_host_machines(api_key: Optional[Text] = None) -> None: - """CLI wrapper function for the LIST_HOST_MACHINES function in - ModelFactory. + """List available host machines for model deployment. + + This CLI command wraps the ModelFactory.list_host_machines function and outputs + the results in YAML format. Args: - api_key (Text, optional): Team API key. Defaults to None. + api_key (Text, optional): Team API key for authentication. Defaults to None, + using the configured environment variable. Returns: - None + None: Prints the host machines list in YAML format to stdout. """ ret_val = ModelFactory.list_host_machines(api_key) ret_val_yaml = yaml.dump(ret_val) @@ -48,14 +51,20 @@ def list_host_machines(api_key: Optional[Text] = None) -> None: @click.option("--verbose", is_flag=True, help="List all function details, False by default.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def list_functions(verbose: bool, api_key: Optional[Text] = None) -> None: - """CLI wrapper function for the LIST_FUNCTIONS function in ModelFactory. + """List available functions for model deployment. + + This CLI command wraps the ModelFactory.list_functions function and outputs + the results in YAML format. Functions represent the different types of + operations that models can perform. Args: - verbose (Boolean, optional): Set to True if a detailed response - is desired; is otherwise False by default. - api_key (Text, optional): Team API key. Defaults to None. + verbose (bool): If True, includes detailed information about each function. + If False, provides a simplified list. + api_key (Text, optional): Team API key for authentication. Defaults to None, + using the configured environment variable. + Returns: - None + None: Prints the functions list in YAML format to stdout. """ ret_val = ModelFactory.list_functions(verbose, api_key) ret_val_yaml = yaml.dump(ret_val) @@ -65,12 +74,18 @@ def list_functions(verbose: bool, api_key: Optional[Text] = None) -> None: @click.command("gpus") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def list_gpus(api_key: Optional[Text] = None) -> None: - """CLI wrapper function for the LIST_GPUS function in ModelFactory. + """List available GPUs for model deployment. + + This CLI command wraps the ModelFactory.list_gpus function and outputs + the results in YAML format. Shows available GPU resources that can be + used for model hosting. Args: - api_key (Text, optional): Team API key. Defaults to None. + api_key (Text, optional): Team API key for authentication. Defaults to None, + using the configured environment variable. + Returns: - None + None: Prints the GPU list in YAML format to stdout. """ ret_val = ModelFactory.list_gpus(api_key) ret_val_yaml = yaml.dump(ret_val) @@ -98,21 +113,25 @@ def create_asset_repo( documentation_url: Optional[Text] = "", api_key: Optional[Text] = None, ) -> None: - """CLI wrapper function for the CREATE_ASSET_REPO function in ModelFactory. + """Create a new asset repository for a model. + + This CLI command wraps the ModelFactory.create_asset_repo function and outputs + the results in YAML format. Creates a new repository for storing model assets + and configurations. Args: - name (Text): Model name - hosting_machine (Text): Hosting machine ID obtained via list_host_machines - always_on (bool): Whether the model should always be on - version (Text): Model version - description (Text): Model description - function (Text): Model function name obtained via LIST_HOST_MACHINES - is_async (bool): Whether the model is asynchronous or not (False in first release) - source_language (Text): 2-character 639-1 code or 3-character 639-3 language code. - api_key (Text, optional): Team API key. Defaults to None. + name (Text): Name of the model. + description (Text): Description of the model's purpose and functionality. + function (Text): Model function name obtained via list_functions. + source_language (Text): Language code in ISO 639-1 (2-char) or 639-3 (3-char) format. + input_modality (Text): Type of input the model accepts (e.g., text, video, image). + output_modality (Text): Type of output the model produces (e.g., text, video, image). + documentation_url (Text, optional): URL to model documentation. Defaults to "". + api_key (Text, optional): Team API key for authentication. Defaults to None, + using the configured environment variable. Returns: - None + None: Prints the created repository details in YAML format to stdout. """ ret_val = ModelFactory.create_asset_repo( name, description, function, source_language, input_modality, output_modality, documentation_url, api_key @@ -124,13 +143,18 @@ def create_asset_repo( @click.command("image-repo-login") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def asset_repo_login(api_key: Optional[Text] = None) -> None: - """CLI wrapper function for the ASSET_REPO_LOGIN function in ModelFactory. + """Get login credentials for the asset repository. + + This CLI command wraps the ModelFactory.asset_repo_login function and outputs + the results in YAML format. Provides authentication details needed to access + the model asset repository. Args: - api_key (Text, optional): Team API key. Defaults to None. + api_key (Text, optional): Team API key for authentication. Defaults to None, + using the configured environment variable. Returns: - None + None: Prints the login credentials in YAML format to stdout. """ ret_val = ModelFactory.asset_repo_login(api_key) ret_val_yaml = yaml.dump(ret_val) @@ -146,15 +170,22 @@ def asset_repo_login(api_key: Optional[Text] = None) -> None: def onboard_model( model_id: Text, image_tag: Text, image_hash: Text, host_machine: Optional[Text] = "", api_key: Optional[Text] = None ) -> None: - """CLI wrapper function for the ONBOARD_MODEL function in ModelFactory. + """Onboard a model image for deployment. + + This CLI command wraps the ModelFactory.onboard_model function and outputs + the results in YAML format. Prepares a model image for deployment by registering + it with the platform. Args: - model_id (Text): Model ID obtained from CREATE_ASSET_REPO. - image_tag (Text): Image tag to be onboarded. - api_key (Text, optional): Team API key. Defaults to None. + model_id (Text): Model ID obtained from create_asset_repo. + image_tag (Text): Tag of the Docker image to be onboarded. + image_hash (Text): Hash of the Docker image for verification. + host_machine (Text, optional): ID of the machine to host the model. Defaults to "". + api_key (Text, optional): Team API key for authentication. Defaults to None, + using the configured environment variable. Returns: - None + None: Prints the onboarding results in YAML format to stdout. """ ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash, host_machine, api_key) ret_val_yaml = yaml.dump(ret_val) @@ -174,14 +205,21 @@ def deploy_huggingface_model( revision: Optional[Text] = None, api_key: Optional[Text] = None, ) -> None: - """CLI wrapper function for the DEPLOY_HUGGINGFACE_MODEL function in ModelFactory. + """Deploy a model from Hugging Face Hub. + + This CLI command wraps the ModelFactory.deploy_huggingface_model function and outputs + the results in YAML format. Deploys a model directly from Hugging Face's model hub. Args: - name (Text): User-defined name for Hugging Face model. - api_key (Text, optional): Team API key. Defaults to None. + name (Text): User-defined name for the Hugging Face model. + hf_repo_id (Text): Repository ID from Hugging Face in 'org/model-name' format. + hf_token (Text, optional): Hugging Face token for private models. Defaults to None. + revision (Text, optional): Specific model revision/commit hash. Defaults to None. + api_key (Text, optional): Team API key for authentication. Defaults to None, + using the configured environment variable. Returns: - None + None: Prints the deployment results in YAML format to stdout. """ ret_val = ModelFactory.deploy_huggingface_model(name, hf_repo_id, revision, hf_token, api_key) ret_val_yaml = yaml.dump(ret_val) @@ -192,14 +230,19 @@ def deploy_huggingface_model( @click.option("--model-id", help="Model ID from DEPLOY_HUGGINGFACE_MODEL.") @click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") def get_huggingface_model_status(model_id: Text, api_key: Optional[Text] = None) -> None: - """CLI wrapper function for the GET_HUGGINGFACE_MODEL_STATUS function in ModelFactory. + """Check the deployment status of a Hugging Face model. + + This CLI command wraps the ModelFactory.get_huggingface_model_status function and + outputs the results in YAML format. Retrieves the current status of a Hugging Face + model deployment. Args: - model_id (Text): Model ID obtained from DEPLOY_HUGGINGFACE_MODEL. - api_key (Text, optional): Team API key. Defaults to None. + model_id (Text): Model ID obtained from deploy_huggingface_model. + api_key (Text, optional): Team API key for authentication. Defaults to None, + using the configured environment variable. Returns: - None + None: Prints the model status in YAML format to stdout. """ ret_val = ModelFactory.get_huggingface_model_status(model_id, api_key) ret_val_yaml = yaml.dump(ret_val) diff --git a/aixplain/factories/corpus_factory.py b/aixplain/factories/corpus_factory.py index 9563ad14..9747eab1 100644 --- a/aixplain/factories/corpus_factory.py +++ b/aixplain/factories/corpus_factory.py @@ -48,17 +48,37 @@ class CorpusFactory(AssetFactory): + """Factory class for creating and managing corpora in the aiXplain platform. + + This class provides functionality for creating, retrieving, and managing + corpora, which are collections of data assets used for training and + evaluating AI models. + + Attributes: + backend_url (str): Base URL for the aiXplain backend API. + """ backend_url = config.BACKEND_URL @classmethod def __from_response(cls, response: Dict) -> Corpus: - """Converts Json response to 'Corpus' object + """Convert API response into a Corpus object. + + This method creates a Corpus object from an API response, handling the + conversion of languages, data types, functions, and other attributes. Args: - response (dict): Json from API + response (Dict): API response containing: + - id: Corpus identifier + - name: Corpus name + - description: Corpus description + - data: List of data asset configurations + - suggestedFunction: List of function identifiers + - license: License configuration + - status: Onboarding status + - segmentsCount: Number of segments Returns: - Dataset: Coverted 'Dataset' object + Corpus: Instantiated corpus object with all components loaded. """ data = [] for d in response["data"]: @@ -106,13 +126,22 @@ def __from_response(cls, response: Dict) -> Corpus: @classmethod def get(cls, corpus_id: Text) -> Corpus: - """Create a 'Corpus' object from corpus id + """Retrieve a corpus by its ID. + + This method fetches a corpus and all its associated data assets from + the platform. Args: - corpus_id (Text): Corpus ID of required corpus. + corpus_id (Text): Unique identifier of the corpus to retrieve. Returns: - Corpus: Created 'Corpus' object + Corpus: Retrieved corpus object with all data assets loaded. + + Raises: + Exception: If: + - Corpus ID is invalid + - Authentication fails + - Service is unavailable """ try: url = urljoin(cls.backend_url, f"sdk/corpora/{corpus_id}/overview") @@ -154,19 +183,39 @@ def list( page_number: int = 0, page_size: int = 20, ) -> Dict: - """Corpus Listing + """List and filter corpora with pagination support. + + This method provides comprehensive filtering and pagination capabilities + for retrieving corpora from the aiXplain platform. Args: - query (Optional[Text], optional): search query. Defaults to None. - function (Optional[Function], optional): function filter. Defaults to None. - language (Optional[Union[Language, List[Language]]], optional): language filter. Defaults to None. - data_type (Optional[DataType], optional): data type filter. Defaults to None. - license (Optional[License], optional): license filter. Defaults to None. - page_number (int, optional): page number. Defaults to 0. - page_size (int, optional): page size. Defaults to 20. + query (Optional[Text], optional): Search query to filter corpora by name + or description. Defaults to None. + function (Optional[Function], optional): Filter by AI function type. + Defaults to None. + language (Optional[Union[Language, List[Language]]], optional): Filter by + language(s). Can be single language or list. Defaults to None. + data_type (Optional[DataType], optional): Filter by data type. + Defaults to None. + license (Optional[License], optional): Filter by license type. + Defaults to None. + page_number (int, optional): Zero-based page number. Defaults to 0. + page_size (int, optional): Number of items per page (1-100). + Defaults to 20. Returns: - Dict: list of corpora in agreement with the filters, page number, page total and total elements + Dict: Response containing: + - results (List[Corpus]): List of corpus objects + - page_total (int): Total items in current page + - page_number (int): Current page number + - total (int): Total number of items across all pages + + Raises: + Exception: If: + - page_size is not between 1 and 100 + - Request fails + - Service is unavailable + AssertionError: If page_size is invalid. """ url = urljoin(cls.backend_url, "sdk/corpora/paginate") @@ -229,15 +278,24 @@ def list( def get_assets_from_page( cls, page_number: int = 1, task: Optional[Function] = None, language: Optional[Text] = None ) -> List[Corpus]: - """Get the list of corpora from a given page. Additional task and language filters can be also be provided + """Retrieve a paginated list of corpora with optional filters. + + Note: + This method is deprecated. Use list() instead. Args: - page_number (int, optional): Page from which corpora are to be listed. Defaults to 1. - task (Function, optional): Task of listed corpora. Defaults to None. - language (Text, optional): language of listed corpora. Defaults to None. + page_number (int, optional): One-based page number. Defaults to 1. + task (Optional[Function], optional): Filter by AI task/function. + Defaults to None. + language (Optional[Text], optional): Filter by language code. + Defaults to None. Returns: - List[Corpus]: List of corpora based on given filters + List[Corpus]: List of corpus objects matching the filters. + + Deprecated: + Use list() method instead for more comprehensive filtering and + pagination capabilities. """ warn( 'This method will be deprecated in the next versions of the SDK. Use "list" instead.', @@ -263,23 +321,45 @@ def create( error_handler: ErrorHandler = ErrorHandler.SKIP, api_key: Optional[Text] = None, ) -> Dict: - """Asynchronous call to Upload a corpus to the user's dashboard. + """Create a new corpus from data files. + + This method asynchronously uploads and processes data files to create a new + corpus in the user's dashboard. The data files are processed according to + the provided schema and combined with any referenced existing data. Args: - name (Text): corpus name - description (Text): corpus description - license (License): corpus license - content_path (Union[Union[Text, Path], List[Union[Text, Path]]]): path to .csv files containing the data - schema (List[Union[Dict, MetaData]]): meta data - ref_data (Optional[List[Union[Text, Data]]], optional): referencing data which already exists and should be part of the corpus. Defaults to []. - tags (Optional[List[Text]], optional): tags that explain the corpus. Defaults to []. - functions (Optional[List[Function]], optional): AI functions for which the corpus may be used. Defaults to []. - privacy (Optional[Privacy], optional): visibility of the corpus. Defaults to Privacy.PRIVATE. - error_handler (ErrorHandler, optional): how to handle failed rows in the data asset. Defaults to ErrorHandler.SKIP. - api_key (Optional[Text]): team api key. Defaults to None. + name (Text): Name for the new corpus. + description (Text): Description of the corpus's contents and purpose. + license (License): License type for the corpus. + content_path (Union[Union[Text, Path], List[Union[Text, Path]]]): Path(s) + to CSV files containing the data. Can be single path or list. + schema (List[Union[Dict, MetaData]]): Metadata configurations defining + how to process the data files. + ref_data (List[Any], optional): References to existing data assets to + include in the corpus. Can be Data objects or IDs. Defaults to []. + tags (List[Text], optional): Tags describing the corpus content. + Defaults to []. + functions (List[Function], optional): AI functions this corpus is + suitable for. Defaults to []. + privacy (Privacy, optional): Visibility setting for the corpus. + Defaults to Privacy.PRIVATE. + error_handler (ErrorHandler, optional): Strategy for handling data + processing errors. Defaults to ErrorHandler.SKIP. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. Returns: - Dict: response dict + Dict: Response containing: + - status: Current processing status + - asset_id: ID of the created corpus + + Raises: + Exception: If: + - No schema or reference data provided + - Referenced data asset doesn't exist + - Reserved column names are used + - Data rows are misaligned + - Processing or upload fails """ folder, return_dict = None, {} # check team key diff --git a/aixplain/factories/data_factory.py b/aixplain/factories/data_factory.py index 65aa8a87..385be4e7 100644 --- a/aixplain/factories/data_factory.py +++ b/aixplain/factories/data_factory.py @@ -36,23 +36,38 @@ class DataFactory(AssetFactory): - """A static class for creating and exploring Dataset Objects. + """Factory class for creating and managing data assets. + + This class provides functionality for creating, retrieving, and managing + data assets in the aiXplain platform. Data assets represent individual + pieces of data (e.g., text, audio) that can be used in corpora or + directly with models. Attributes: - backend_url (str): The URL for the backend. + backend_url (str): Base URL for the aiXplain backend API. """ backend_url = config.BACKEND_URL @classmethod def __from_response(cls, response: Dict) -> Data: - """Converts Json response to 'Data' object + """Convert API response into a Data object. + + This method creates a Data object from an API response, handling the + conversion of languages, data types, and other attributes. Args: - response (dict): Json from API + response (Dict): API response containing: + - id: Data asset identifier + - name: Data asset name + - dataType: Type of data (e.g., text, audio) + - dataSubtype: Subtype of data + - metadata: Dictionary containing language configurations + - status: Onboarding status + - segmentsCount: Optional number of segments Returns: - Data: Converted 'Data' object + Data: Instantiated data asset object with all attributes set. """ languages = [] if "languages" in response["metadata"]: @@ -78,13 +93,22 @@ def __from_response(cls, response: Dict) -> Data: @classmethod def get(cls, data_id: Text) -> Data: - """Create a 'Data' object from dataset id + """Retrieve a data asset by its ID. + + This method fetches a data asset from the platform using its unique + identifier. Args: - data_id (Text): Data ID of required dataset. + data_id (Text): Unique identifier of the data asset to retrieve. Returns: - Data: Created 'Data' object + Data: Retrieved data asset object with its configuration. + + Raises: + Exception: If: + - Data asset ID is invalid or not found + - Authentication fails + - Service is unavailable """ url = urljoin(cls.backend_url, f"sdk/data/{data_id}/overview") diff --git a/aixplain/factories/dataset_factory.py b/aixplain/factories/dataset_factory.py index 3b86b45e..e431d539 100644 --- a/aixplain/factories/dataset_factory.py +++ b/aixplain/factories/dataset_factory.py @@ -52,23 +52,44 @@ class DatasetFactory(AssetFactory): - """A static class for creating and exploring Dataset Objects. + """Factory class for creating and managing datasets in the aiXplain platform. + + This class provides functionality for creating, retrieving, and managing + datasets, which are structured collections of data assets used for training, + evaluating, and benchmarking AI models. Datasets can include input data, + target data, hypotheses, and metadata. Attributes: - backend_url (str): The URL for the backend. + backend_url (str): Base URL for the aiXplain backend API. """ backend_url = config.BACKEND_URL @classmethod def __from_response(cls, response: Dict) -> Dataset: - """Converts response Json to 'Dataset' object + """Convert API response into a Dataset object. + + This method creates a Dataset object from an API response, handling the + conversion of data assets, languages, functions, and other attributes. + It processes input data, hypotheses, metadata, and target data separately. Args: - response (dict): Json from API + response (Dict): API response containing: + - id: Dataset identifier + - name: Dataset name + - description: Dataset description + - data: List of data asset configurations + - input: Input data configurations + - hypotheses: Optional hypotheses configurations + - metadata: Metadata configurations + - output: Output/target data configurations + - function: Function identifier + - license: License configuration + - status: Onboarding status + - segmentsCount: Optional number of segments Returns: - Dataset: Coverted 'Dataset' object + Dataset: Instantiated dataset object with all components loaded. """ # process data data = {} @@ -154,13 +175,22 @@ def __from_response(cls, response: Dict) -> Dataset: @classmethod def get(cls, dataset_id: Text) -> Dataset: - """Create a 'Dataset' object from dataset id + """Retrieve a dataset by its ID. + + This method fetches a dataset and all its associated data assets from + the platform. Args: - dataset_id (Text): Dataset ID of required dataset. + dataset_id (Text): Unique identifier of the dataset to retrieve. Returns: - Dataset: Created 'Dataset' object + Dataset: Retrieved dataset object with all components loaded. + + Raises: + Exception: If: + - Dataset ID is invalid + - Authentication fails + - Service is unavailable """ try: url = urljoin(cls.backend_url, f"sdk/datasets/{dataset_id}/overview") @@ -195,21 +225,45 @@ def list( page_number: int = 0, page_size: int = 20, ) -> Dict: - """Listing Datasets + """List and filter datasets with pagination support. + + This method provides comprehensive filtering and pagination capabilities + for retrieving datasets from the aiXplain platform. Args: - query (Optional[Text], optional): search query. Defaults to None. - function (Optional[Function], optional): function filter. Defaults to None. - source_languages (Optional[Union[Language, List[Language]]], optional): language filter of input data. Defaults to None. - target_languages (Optional[Union[Language, List[Language]]], optional): language filter of output data. Defaults to None. - data_type (Optional[DataType], optional): data type filter. Defaults to None. - license (Optional[License], optional): license filter. Defaults to None. - is_referenceless (Optional[bool], optional): has reference filter. Defaults to None. - page_number (int, optional): page number. Defaults to 0. - page_size (int, optional): page size. Defaults to 20. + query (Optional[Text], optional): Search query to filter datasets by name + or description. Defaults to None. + function (Optional[Function], optional): Filter by AI function type. + Defaults to None. + source_languages (Optional[Union[Language, List[Language]]], optional): + Filter by input data language(s). Can be single language or list. + Defaults to None. + target_languages (Optional[Union[Language, List[Language]]], optional): + Filter by output data language(s). Can be single language or list. + Defaults to None. + data_type (Optional[DataType], optional): Filter by data type. + Defaults to None. + license (Optional[License], optional): Filter by license type. + Defaults to None. + is_referenceless (Optional[bool], optional): Filter by whether dataset + has references. Defaults to None. + page_number (int, optional): Zero-based page number. Defaults to 0. + page_size (int, optional): Number of items per page (1-100). + Defaults to 20. Returns: - Dict: list of datasets in agreement with the filters, page number, page total and total elements + Dict: Response containing: + - results (List[Dataset]): List of dataset objects + - page_total (int): Total items in current page + - page_number (int): Current page number + - total (int): Total number of items across all pages + + Raises: + Exception: If: + - page_size is not between 1 and 100 + - Request fails + - Service is unavailable + AssertionError: If page_size is invalid. """ url = urljoin(cls.backend_url, "sdk/datasets/paginate") @@ -298,30 +352,66 @@ def create( aws_credentials: Optional[Dict[Text, Text]] = {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None}, api_key: Optional[Text] = None, ) -> Dict: - """Dataset Onboard + """Create a new dataset from data files and references. + + This method processes data files and existing data assets to create a new + dataset in the platform. It supports various data types, multiple input and + output configurations, and optional data splitting. Args: - name (Text): dataset name - description (Text): dataset description - license (License): dataset license - function (Function): dataset function - input_schema (List[Union[Dict, MetaData]]): metadata of inputs - output_schema (List[Union[Dict, MetaData]]): metadata of outputs - hypotheses_schema (List[Union[Dict, MetaData]], optional): schema of the hypotheses to the references. Defaults to []. - metadata_schema (List[Union[Dict, MetaData]], optional): metadata of metadata information of the dataset. Defaults to []. - content_path (Union[Union[Text, Path], List[Union[Text, Path]]]): path to files which contain the data content - input_ref_data (Dict[Text, Any], optional): reference to input data which is already in the platform. Defaults to {}. - output_ref_data (Dict[Text, List[Any]], optional): reference to output data which is already in the platform. Defaults to {}. - hypotheses_ref_data (Dict[Text, Any], optional): hypotheses which are already in the platform. Defaults to {}. - meta_ref_data (Dict[Text, Any], optional): metadata which is already in the platform. Defaults to {}. - tags (List[Text], optional): datasets description tags. Defaults to []. - privacy (Privacy, optional): dataset privacy. Defaults to Privacy.PRIVATE. - error_handler (ErrorHandler, optional): how to handle failed rows in the data asset. Defaults to ErrorHandler.SKIP. - s3_link (Optional[Text]): s3 url to files or directories - aws_credentials (Optional[Dict[Text, Text]]) : credentials for AWS and it should contains these two keys `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` - api_key (Optional[Text]): team api key. Defaults to None. + name (Text): Name for the new dataset. + description (Text): Description of the dataset's contents and purpose. + license (License): License type for the dataset. + function (Function): AI function this dataset is suitable for. + input_schema (List[Union[Dict, MetaData]]): Metadata configurations for + input data processing. + output_schema (List[Union[Dict, MetaData]], optional): Metadata configs + for output/target data. Defaults to []. + hypotheses_schema (List[Union[Dict, MetaData]], optional): Metadata + configs for hypothesis data. Defaults to []. + metadata_schema (List[Union[Dict, MetaData]], optional): Additional + metadata configurations. Defaults to []. + content_path (Union[Union[Text, Path], List[Union[Text, Path]]], optional): + Path(s) to data files. Can be single path or list. Defaults to []. + input_ref_data (Dict[Text, Any], optional): References to existing + input data assets. Defaults to {}. + output_ref_data (Dict[Text, List[Any]], optional): References to + existing output data assets. Defaults to {}. + hypotheses_ref_data (Dict[Text, Any], optional): References to + existing hypothesis data. Defaults to {}. + meta_ref_data (Dict[Text, Any], optional): References to existing + metadata assets. Defaults to {}. + tags (List[Text], optional): Tags describing the dataset. + Defaults to []. + privacy (Privacy, optional): Visibility setting. + Defaults to Privacy.PRIVATE. + split_labels (Optional[List[Text]], optional): Labels for dataset + splits (e.g., ["train", "test"]). Defaults to None. + split_rate (Optional[List[float]], optional): Ratios for dataset + splits (must sum to 1). Defaults to None. + error_handler (ErrorHandler, optional): Strategy for handling data + processing errors. Defaults to ErrorHandler.SKIP. + s3_link (Optional[Text], optional): S3 URL for data files. + Defaults to None. + aws_credentials (Optional[Dict[Text, Text]], optional): AWS credentials + with access_key_id and secret_access_key. Defaults to None values. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + Returns: - Dict: dataset onboard status + Dict: Response containing: + - status: Current processing status + - asset_id: ID of the created dataset + + Raises: + Exception: If: + - No input data is provided + - Referenced data asset doesn't exist + - Reserved column names are used + - Data rows are misaligned + - Split configuration is invalid + - Processing or upload fails + AssertionError: If split configuration is invalid. """ for lmd in (hypotheses_schema, input_schema, output_schema, metadata_schema): diff --git a/aixplain/factories/file_factory.py b/aixplain/factories/file_factory.py index e1acebf2..39f5f552 100644 --- a/aixplain/factories/file_factory.py +++ b/aixplain/factories/file_factory.py @@ -37,6 +37,14 @@ class FileFactory: + """Factory class for managing file uploads and storage in the aiXplain platform. + + This class provides functionality for uploading files to S3 storage, + checking storage types, and managing file links. It supports various file + types with different size limits and handles both temporary and permanent + storage. + """ + @classmethod def upload( cls, @@ -46,21 +54,39 @@ def upload( is_temp: bool = True, return_download_link: bool = False, ) -> Text: - """ - Uploads a file to an S3 bucket. + """Upload a file to the aiXplain S3 storage. + + This method uploads a file to S3 storage with size limits based on file type: + - Audio: 50MB + - Application: 25MB + - Video: 300MB + - Image: 25MB + - Database: 300MB + - Other: 50MB Args: - local_path (Text): The local path of the file to upload. - tags (List[Text], optional): tags of the file - license (License, optional): the license for the file - is_temp (bool): specify if the file that will be upload is a temporary file - return_download_link (bool): specify if the function should return the download link of the file or the S3 path + local_path (Text): Path to the file to upload. + tags (Optional[List[Text]], optional): Tags to associate with the file. + Defaults to None. + license (Optional[License], optional): License type for the file. + Required for non-temporary files. Defaults to None. + is_temp (bool, optional): Whether this is a temporary upload. + Defaults to True. + return_download_link (bool, optional): Whether to return a download + link instead of S3 path. Only valid for temporary files. + Defaults to False. + Returns: - Text: The S3 path where the file was uploaded. + Text: Either: + - S3 path where the file was uploaded (if return_download_link=False) + - Download URL for the file (if return_download_link=True) Raises: - FileNotFoundError: If the local file is not found. - Exception: If the file size exceeds the maximum allowed size. + FileNotFoundError: If the local file doesn't exist. + Exception: If: + - File size exceeds the type-specific limit + - Requesting download link for non-temporary file + AssertionError: If requesting download link for non-temporary file. """ if is_temp is False: assert ( @@ -109,13 +135,19 @@ def upload( @classmethod def check_storage_type(cls, input_link: Any) -> StorageType: - """Check whether a path is a URL (s3 link or HTTP link), a file or a textual content + """Determine the storage type of a given input. + + This method checks whether the input is a local file path, a URL + (including S3 and HTTP/HTTPS links), or raw text content. Args: - input_link (Any): path to be checked + input_link (Any): Input to check. Can be a file path, URL, or text. Returns: - StorageType: URL, TEXT or FILE + StorageType: Storage type enum value: + - StorageType.FILE: Local file path + - StorageType.URL: S3 or HTTP/HTTPS URL + - StorageType.TEXT: Raw text content """ if os.path.exists(input_link) is True and os.path.isfile(input_link) is True: return StorageType.FILE @@ -131,13 +163,21 @@ def check_storage_type(cls, input_link: Any) -> StorageType: @classmethod def to_link(cls, data: Union[Text, Dict], **kwargs) -> Union[Text, Dict]: - """If user input data is a local file, upload to aiXplain platform + """Convert local file paths to aiXplain platform links. + + This method checks if the input contains local file paths and uploads + them to the platform, replacing the paths with the resulting URLs. + Other types of input (URLs, text) are left unchanged. Args: - data (Union[Text, Dict]): input data + data (Union[Text, Dict]): Input data to process. Can be: + - Text: Single file path, URL, or text content + - Dict: Dictionary with string values that may be file paths + **kwargs: Additional arguments passed to upload() method. Returns: - Union[Text, Dict]: input links/texts + Union[Text, Dict]: Processed input where any local file paths have + been replaced with platform URLs. Structure matches input type. """ if isinstance(data, dict): for key in data: @@ -153,20 +193,29 @@ def to_link(cls, data: Union[Text, Dict], **kwargs) -> Union[Text, Dict]: def create( cls, local_path: Text, tags: Optional[List[Text]] = None, license: Optional[License] = None, is_temp: bool = False ) -> Text: - """ - Uploads a file to an S3 bucket. + """Create a permanent or temporary file asset in the platform. + + This method is similar to upload() but with a focus on creating file + assets. For permanent assets (is_temp=False), a license is required. Args: - local_path (Text): The local path of the file to upload. - tags (List[Text], optional): tags of the file - license (License, optional): the license for the file - is_temp (bool): specify if the file that will be upload is a temporary file + local_path (Text): Path to the file to upload. + tags (Optional[List[Text]], optional): Tags to associate with the file. + Defaults to None. + license (Optional[License], optional): License type for the file. + Required for non-temporary files. Defaults to None. + is_temp (bool, optional): Whether this is a temporary upload. + Defaults to False. + Returns: - Text: The S3 path where the file was uploaded. + Text: Either: + - S3 path for permanent files (is_temp=False) + - Download URL for temporary files (is_temp=True) Raises: - FileNotFoundError: If the local file is not found. - Exception: If the file size exceeds the maximum allowed size. + FileNotFoundError: If the local file doesn't exist. + Exception: If file size exceeds the type-specific limit. + AssertionError: If license is not provided for non-temporary files. """ assert ( license is not None if is_temp is False else True diff --git a/aixplain/factories/finetune_factory/__init__.py b/aixplain/factories/finetune_factory/__init__.py index b6006f0b..7189044f 100644 --- a/aixplain/factories/finetune_factory/__init__.py +++ b/aixplain/factories/finetune_factory/__init__.py @@ -38,23 +38,29 @@ class FinetuneFactory: - """A static class for creating and managing the FineTune experience. + """Factory class for creating and managing model fine-tuning operations. + + This class provides static methods to create and manage fine-tuning jobs + for machine learning models. It handles cost estimation, dataset preparation, + and fine-tuning configuration. Attributes: - backend_url (str): The URL for the backend. + backend_url (str): Base URL for the aiXplain backend API. """ backend_url = config.BACKEND_URL @classmethod def _create_cost_from_response(cls, response: Dict) -> FinetuneCost: - """Create a Cost object from the response dictionary. + """Create a FinetuneCost object from an API response. Args: - response (Dict): The response dictionary containing cost information. + response (Dict): API response dictionary containing cost information + with 'trainingCost', 'inferenceCost', and 'hostingCost' fields. Returns: - Cost: The Cost object created from the response. + FinetuneCost: Object containing the parsed cost information for + training, inference, and hosting. """ return FinetuneCost(response["trainingCost"], response["inferenceCost"], response["hostingCost"]) @@ -69,18 +75,31 @@ def create( train_percentage: Optional[float] = 100, dev_percentage: Optional[float] = 0, ) -> Finetune: - """Create a Finetune object with the provided information. + """Create a new fine-tuning job with the specified configuration. + + This method sets up a fine-tuning job by validating the configuration, + estimating costs, and preparing the datasets and model. It supports both + direct Dataset/Model objects and their IDs as inputs. Args: - name (Text): Name of the Finetune. - dataset_list (List[Dataset]): List of Datasets (or dataset IDs) to be used for fine-tuning. - model (Model): Model (Model ID) to be fine-tuned. - prompt_template (Text, optional): Fine-tuning prompt_template. Should reference columns in the dataset using format <>. Defaults to None. - hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None. - train_percentage (float, optional): Percentage of training samples. Defaults to 100. - dev_percentage (float, optional): Percentage of development samples. Defaults to 0. + name (Text): Name for the fine-tuning job. + dataset_list (List[Union[Dataset, Text]]): List of Dataset objects or dataset IDs + to use for fine-tuning. + model (Union[Model, Text]): Model object or model ID to be fine-tuned. + prompt_template (Text, optional): Template for formatting training examples. + Use <> to reference dataset columns. Defaults to None. + hyperparameters (Hyperparameters, optional): Fine-tuning hyperparameters + configuration. Defaults to None. + train_percentage (float, optional): Percentage of data to use for training. + Must be > 0. Defaults to 100. + dev_percentage (float, optional): Percentage of data to use for validation. + train_percentage + dev_percentage must be <= 100. Defaults to 0. + Returns: - Finetune: The Finetune object created with the provided information or None if there was an error. + Finetune: Configured fine-tuning job object, or None if creation failed. + + Raises: + AssertionError: If train_percentage <= 0 or train_percentage + dev_percentage > 100. """ payload = {} assert train_percentage > 0, f"Create FineTune: Train percentage ({train_percentage}) must be greater than zero" diff --git a/aixplain/factories/finetune_factory/prompt_validator.py b/aixplain/factories/finetune_factory/prompt_validator.py index 2ed753e1..4db5b974 100644 --- a/aixplain/factories/finetune_factory/prompt_validator.py +++ b/aixplain/factories/finetune_factory/prompt_validator.py @@ -3,13 +3,46 @@ import re -def _get_data_list(dataset: Dataset): +def _get_data_list(dataset: Dataset) -> List: + """Extract and flatten source and target data from a dataset. + + This helper function combines source data and flattened target data from + a dataset into a single list. + + Args: + dataset (Dataset): Dataset object containing source and target data. + + Returns: + List: Combined list of source and target data items. + """ flatten_target_values = [item for sublist in list(dataset.target_data.values()) for item in sublist] data_list = list(dataset.source_data.values()) + flatten_target_values return data_list def validate_prompt(prompt: Text, dataset_list: List[Dataset]) -> Text: + """Validate and normalize a prompt template against a list of datasets. + + This function processes a prompt template that contains references to dataset + columns in the format <> or <>. It validates that all + referenced columns exist in the provided datasets and normalizes column IDs + to their corresponding names. + + Args: + prompt (Text): Prompt template containing column references in + <> or <> format. + dataset_list (List[Dataset]): List of datasets to validate the + prompt template against. + + Returns: + Text: Normalized prompt template with column references converted + to {COLUMN_NAME} format. + + Raises: + AssertionError: If any of these conditions are met: + - Multiple datasets have the same referenced column name + - Referenced columns are not found in any dataset + """ result_prompt = prompt referenced_data = set(re.findall("<<(.+?)>>", prompt)) for dataset in dataset_list: diff --git a/aixplain/factories/index_factory/__init__.py b/aixplain/factories/index_factory/__init__.py index 2eacca55..8e7620c6 100644 --- a/aixplain/factories/index_factory/__init__.py +++ b/aixplain/factories/index_factory/__init__.py @@ -33,22 +33,32 @@ from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin -def validate_embedding_model(model_id) -> bool: - model = ModelFactory.get(model_id) - return model.function == Function.TEXT_EMBEDDING +def validate_embedding_model(model_id: Union[EmbeddingModel, str]) -> bool: + """Validate that a model is a text embedding model. + Args: + model_id (Union[EmbeddingModel, str]): The model ID or EmbeddingModel enum + value to validate. -def validate_embedding_model(model_id) -> bool: + Returns: + bool: True if the model is a text embedding model, False otherwise. + """ model = ModelFactory.get(model_id) return model.function == Function.TEXT_EMBEDDING -def validate_embedding_model(model_id) -> bool: - model = ModelFactory.get(model_id) - return model.function == Function.TEXT_EMBEDDING +class IndexFactory(ModelFactory, Generic[T]): + """Factory class for creating and managing index collections. + This class extends ModelFactory to provide specialized functionality for + managing index collections, which are used for efficient data retrieval + and searching. It supports various index types through the generic + parameter T. -class IndexFactory(ModelFactory, Generic[T]): + Attributes: + T (TypeVar): Type variable bound to BaseIndexParams, representing + the specific index parameters type. + """ @classmethod def create( cls, @@ -58,7 +68,31 @@ def create( params: Optional[T] = None, **kwargs, ) -> IndexModel: - """Create a new index collection""" + """Create a new index collection for efficient data retrieval. + + This method supports two ways of creating an index: + 1. Using individual parameters (name, description, embedding_model) - Deprecated + 2. Using a params object of type T (recommended) + + Args: + name (Optional[Text], optional): Name of the index collection. + Deprecated, use params instead. Defaults to None. + description (Optional[Text], optional): Description of the index collection. + Deprecated, use params instead. Defaults to None. + embedding_model (Union[EmbeddingModel, str], optional): Model to use for text embeddings. + Deprecated, use params instead. Defaults to EmbeddingModel.OPENAI_ADA002. + params (Optional[T], optional): Index parameters object. This is the + recommended way to create an index. Defaults to None. + **kwargs: Additional keyword arguments. + + Returns: + IndexModel: Created index collection model. + + Raises: + AssertionError: If neither params nor all legacy parameters are provided, + or if both params and legacy parameters are provided. + Exception: If index creation fails. + """ import warnings warnings.warn( @@ -106,7 +140,25 @@ def list( page_number: int = 0, page_size: int = 20, ) -> List[IndexModel]: - """List all indexes""" + """List available index collections with optional filtering and sorting. + + Args: + query (Optional[Text], optional): Search query to filter indexes. + Defaults to "". + suppliers (Optional[Union[Supplier, List[Supplier]]], optional): Filter by + supplier(s). Defaults to None. + ownership (Optional[Tuple[OwnershipType, List[OwnershipType]]], optional): + Filter by ownership type. Defaults to None. + sort_by (Optional[SortBy], optional): Field to sort results by. + Defaults to None. + sort_order (SortOrder, optional): Sort direction (ascending/descending). + Defaults to SortOrder.ASCENDING. + page_number (int, optional): Page number for pagination. Defaults to 0. + page_size (int, optional): Number of results per page. Defaults to 20. + + Returns: + List[IndexModel]: List of index models matching the specified criteria. + """ return super().list( function=Function.SEARCH, query=query, diff --git a/aixplain/factories/index_factory/utils.py b/aixplain/factories/index_factory/utils.py index b09df3c0..fd3cbcf6 100644 --- a/aixplain/factories/index_factory/utils.py +++ b/aixplain/factories/index_factory/utils.py @@ -7,11 +7,29 @@ class BaseIndexParams(BaseModel, ABC): + """Abstract base class for index parameters. + + This class defines the common parameters and functionality for all index types. + It uses Pydantic for data validation and serialization. + + Attributes: + model_config (ConfigDict): Pydantic configuration using enum values. + name (Text): Name of the index. + description (Optional[Text]): Description of the index. Defaults to "". + """ model_config = ConfigDict(use_enum_values=True) name: Text description: Optional[Text] = "" - def to_dict(self): + def to_dict(self) -> Dict: + """Convert the parameters to a dictionary format. + + Converts the parameters to a dictionary suitable for API requests, + renaming 'name' to 'data' in the process. + + Returns: + Dict: Dictionary representation of the parameters. + """ data = self.model_dump(exclude_none=True) data["data"] = data.pop("name") return data @@ -24,11 +42,33 @@ def id(self) -> str: class BaseIndexParamsWithEmbeddingModel(BaseIndexParams, ABC): + """Abstract base class for index parameters that require an embedding model. + + This class extends BaseIndexParams to add support for embedding model configuration, + including model selection and embedding size settings. + + Attributes: + embedding_model (Optional[Union[EmbeddingModel, str]]): Model to use for text + embeddings. Defaults to EmbeddingModel.OPENAI_ADA002. + embedding_size (Optional[int]): Size of the embeddings to generate. + Defaults to None. + """ embedding_model: Optional[Union[EmbeddingModel, str]] = EmbeddingModel.OPENAI_ADA002 embedding_size: Optional[int] = None @field_validator('embedding_model') - def validate_embedding_model(cls, model_id) -> bool: + def validate_embedding_model(cls, model_id) -> str: + """Validate that the provided model is a text embedding model. + + Args: + model_id (Union[EmbeddingModel, str]): Model ID or enum value to validate. + + Returns: + str: The validated model ID. + + Raises: + ValueError: If the model is not a text embedding model. + """ model = ModelFactory.get(model_id) if model.function == Function.TEXT_EMBEDDING: return model_id @@ -36,7 +76,16 @@ def validate_embedding_model(cls, model_id) -> bool: raise ValueError("This is not an embedding model") - def to_dict(self): + def to_dict(self) -> Dict: + """Convert the parameters to a dictionary format. + + Extends the base to_dict method to handle embedding-specific parameters, + renaming fields and restructuring as needed for the API. + + Returns: + Dict: Dictionary representation of the parameters with embedding + configuration properly formatted. + """ data = super().to_dict() data["model"] = data.pop("embedding_model") @@ -49,35 +98,86 @@ def to_dict(self): class VectaraParams(BaseIndexParams): + """Parameters for creating a Vectara index. + + This class defines the configuration for Vectara's vector search index. + + Attributes: + _id (ClassVar[str]): Static model ID for Vectara index type. + """ _id: ClassVar[str] = IndexStores.VECTARA.get_model_id() @property def id(self) -> str: + """Get the model ID for Vectara index type. + + Returns: + str: The Vectara model ID. + """ return self._id class ZeroEntropyParams(BaseIndexParams): + """Parameters for creating a Zero Entropy index. + + This class defines the configuration for Zero Entropy's vector search index. + + Attributes: + _id (ClassVar[str]): Static model ID for Zero Entropy index type. + """ _id: ClassVar[str] = IndexStores.ZERO_ENTROPY.get_model_id() @property def id(self) -> str: + """Get the model ID for Zero Entropy index type. + + Returns: + str: The Zero Entropy model ID. + """ return self._id class AirParams(BaseIndexParamsWithEmbeddingModel): + """Parameters for creating an AIR (aiXplain Index and Retrieval) index. + + This class defines the configuration for AIR's vector search index, + including embedding model settings. + + Attributes: + _id (ClassVar[str]): Static model ID for AIR index type. + """ _id: ClassVar[str] = IndexStores.AIR.get_model_id() @property def id(self) -> str: + """Get the model ID for AIR index type. + + Returns: + str: The AIR model ID. + """ return self._id class GraphRAGParams(BaseIndexParamsWithEmbeddingModel): + """Parameters for creating a GraphRAG (Graph-based Retrieval-Augmented Generation) index. + + This class defines the configuration for GraphRAG's vector search index, + including embedding model and LLM settings. + + Attributes: + _id (ClassVar[str]): Static model ID for GraphRAG index type. + llm (Optional[Text]): ID of the LLM to use for generation. Defaults to None. + """ _id: ClassVar[str] = IndexStores.GRAPHRAG.get_model_id() llm: Optional[Text] = None @property def id(self) -> str: + """Get the model ID for GraphRAG index type. + + Returns: + str: The GraphRAG model ID. + """ return self._id diff --git a/aixplain/factories/integration_factory.py b/aixplain/factories/integration_factory.py index 2a4a397a..5ac09f34 100644 --- a/aixplain/factories/integration_factory.py +++ b/aixplain/factories/integration_factory.py @@ -9,10 +9,31 @@ class IntegrationFactory(ModelGetterMixin, ModelListMixin): + """Factory class for creating and managing Integration models. + + This class provides functionality to get and list Integration models using the backend API. + It inherits from ModelGetterMixin and ModelListMixin to provide model retrieval and listing capabilities. + + Attributes: + backend_url: The URL of the backend API endpoint. + """ backend_url = config.BACKEND_URL @classmethod def get(cls, model_id: Text, api_key: Optional[Text] = None, use_cache: bool = False) -> Integration: + """Retrieves a specific Integration model by its ID. + + Args: + model_id (Text): The unique identifier of the Integration model. + api_key (Optional[Text], optional): API key for authentication. Defaults to None. + use_cache (bool, optional): Whether to use cached data. Defaults to False. + + Returns: + Integration: The retrieved Integration model. + + Raises: + AssertionError: If the provided ID does not correspond to an Integration model. + """ model = super().get(model_id=model_id, api_key=api_key) assert isinstance(model, Integration), f"The provided ID ('{model_id}') is not from an integration model" return model @@ -29,6 +50,21 @@ def list( page_size: int = 20, api_key: Optional[Text] = None, ) -> List[Integration]: + """Lists Integration models based on the provided filters and pagination parameters. + + Args: + query (Optional[Text], optional): Search query string. Defaults to "". + suppliers (Optional[Union[Supplier, List[Supplier]]], optional): Filter by supplier(s). Defaults to None. + ownership (Optional[Tuple[OwnershipType, List[OwnershipType]]], optional): Filter by ownership type. Defaults to None. + sort_by (Optional[SortBy], optional): Field to sort results by. Defaults to None. + sort_order (SortOrder, optional): Sort order (ascending/descending). Defaults to SortOrder.ASCENDING. + page_number (int, optional): Page number for pagination. Defaults to 0. + page_size (int, optional): Number of items per page. Defaults to 20. + api_key (Optional[Text], optional): API key for authentication. Defaults to None. + + Returns: + List[Integration]: A list of Integration models matching the specified criteria. + """ return super().list( function=Function.CONNECTOR, query=query, diff --git a/aixplain/factories/metric_factory.py b/aixplain/factories/metric_factory.py index 6279ffc1..38e228b6 100644 --- a/aixplain/factories/metric_factory.py +++ b/aixplain/factories/metric_factory.py @@ -31,23 +31,27 @@ class MetricFactory: - """A static class for creating and exploring Metric Objects. + """A static factory class for creating and managing Metric objects. + + This class provides functionality to create, retrieve, and list Metric objects + through the backend API. It includes methods for fetching individual metrics + by ID and listing metrics with various filtering options. Attributes: - backend_url (str): The URL for the backend. + backend_url (str): The URL endpoint for the backend API. """ backend_url = config.BACKEND_URL @classmethod def _create_metric_from_response(cls, response: Dict) -> Metric: - """Converts response Json to 'Metric' object + """Converts response JSON to a Metric object. Args: - response (Dict): Json from API + response (Dict): JSON response from the API containing metric data. Returns: - Metric: Coverted 'Metric' object + Metric: A new Metric object created from the response data. """ return Metric( id=response["id"], @@ -61,13 +65,16 @@ def _create_metric_from_response(cls, response: Dict) -> Metric: @classmethod def get(cls, metric_id: Text) -> Metric: - """Create a 'Metric' object from metric id + """Create a Metric object from a metric ID. Args: - model_id (Text): Model ID of required metric. + metric_id (Text): The unique identifier of the metric to retrieve. Returns: - Metric: Created 'Metric' object + Metric: The retrieved Metric object. + + Raises: + Exception: If the metric creation fails, with status code and error message. """ resp, status_code = None, 200 @@ -98,17 +105,24 @@ def list( page_number: int = 0, page_size: int = 20, ) -> List[Metric]: - """Get list of supported metrics for the given filters + """Get a list of supported metrics based on the given filters. Args: - model_id (Text, optional): ID of model for which metric is to be used. Defaults to None. - is_source_required (bool, optional): Should the metric use source. Defaults to None. - is_reference_required (bool, optional): Should the metric use reference. Defaults to None. - page_number (int, optional): page number. Defaults to 0. - page_size (int, optional): page size. Defaults to 20. + model_id (Text, optional): ID of model for which metrics are to be used. Defaults to None. + is_source_required (bool, optional): Filter metrics that require source input. Defaults to None. + is_reference_required (bool, optional): Filter metrics that require reference input. Defaults to None. + page_number (int, optional): Page number for pagination. Defaults to 0. + page_size (int, optional): Number of items per page. Defaults to 20. Returns: - List[Metric]: List of supported metrics + Dict: A dictionary containing: + - results (List[Metric]): List of filtered metrics + - page_total (int): Number of items in the current page + - page_number (int): Current page number + - total (int): Total number of items matching the filters + + Raises: + Exception: If there is an error retrieving the metrics list. """ try: url = urljoin(cls.backend_url, "sdk/metrics") diff --git a/aixplain/factories/model_factory/__init__.py b/aixplain/factories/model_factory/__init__.py index 2e1dc1da..8e4ac918 100644 --- a/aixplain/factories/model_factory/__init__.py +++ b/aixplain/factories/model_factory/__init__.py @@ -31,10 +31,14 @@ from typing import Callable, Dict, List, Optional, Text, Union class ModelFactory(ModelGetterMixin, ModelListMixin): - """A static class for creating and exploring Model Objects. + """Factory class for creating, managing, and exploring models. + + This class provides functionality for creating various types of models, + managing model repositories, and interacting with the aiXplain platform's + model-related features. Attributes: - backend_url (str): The URL for the backend. + backend_url (str): Base URL for the aiXplain backend API. """ backend_url = config.BACKEND_URL @@ -49,17 +53,29 @@ def create_utility_model( output_examples: Text = "", api_key: Optional[Text] = None, ) -> UtilityModel: - """Create a utility model + """Create a new utility model for custom functionality. + + This method creates a utility model that can execute custom code or functions + with specified inputs and outputs. Args: - name (Text): name of the model - code (Union[Text, Callable]): code of the model - description (Text, optional): description of the model - inputs (List[UtilityModelInput], optional): inputs of the model - output_examples (Text, optional): output examples - api_key (Text, optional): Team API key. Defaults to None. + name (Optional[Text]): Name of the utility model. + code (Union[Text, Callable]): Python code as string or callable function + implementing the model's functionality. + inputs (List[UtilityModelInput], optional): List of input specifications. + Defaults to empty list. + description (Optional[Text], optional): Description of what the model does. + Defaults to None. + output_examples (Text, optional): Examples of expected outputs. + Defaults to empty string. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + Returns: - UtilityModel: created utility model + UtilityModel: Created and registered utility model instance. + + Raises: + Exception: If model creation fails or validation fails. """ api_key = config.TEAM_API_KEY if api_key is None else api_key utility_model = UtilityModel( @@ -200,22 +216,29 @@ def create_asset_repo( documentation_url: Optional[Text] = "", api_key: Optional[Text] = None, ) -> Dict: - """Creates an image repository for this model and registers it in the - platform backend. + """Create a new model repository in the platform. + + This method creates and registers a new model repository, setting up the + necessary infrastructure for model deployment. Args: - name (Text): Model name - hosting_machine (Text): Hosting machine ID obtained via list_host_machines - always_on (bool): Whether the model should always be on - version (Text): Model version - description (Text): Model description - function (Text): Model function name obtained via LIST_HOST_MACHINES - is_async (bool): Whether the model is asynchronous or not (False in first release) - source_language (Text): 2-character 639-1 code or 3-character 639-3 language code. - api_key (Text, optional): Team API key. Defaults to None. + name (Text): Name of the model. + description (Text): Description of the model's functionality. + function (Text): Function name from list_functions() defining model's task. + source_language (Text): Language code in ISO 639-1 (2-char) or 639-3 (3-char) format. + input_modality (Text): Type of input the model accepts (e.g., text, audio). + output_modality (Text): Type of output the model produces (e.g., text, audio). + documentation_url (Optional[Text], optional): URL to model documentation. + Defaults to empty string. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. Returns: - Dict: Backend response + Dict: Repository creation response containing model ID and other details. + + Raises: + Exception: If function name is invalid. + AssertionError: If response status code is not 201. """ # Reconcile function name to be function ID in the backend function_list = cls.list_functions(True, config.TEAM_API_KEY)["items"] @@ -334,15 +357,23 @@ def deploy_huggingface_model( hf_token: Optional[Text] = "", api_key: Optional[Text] = None, ) -> Dict: - """Onboards and deploys a Hugging Face large language model. + """Deploy a model from Hugging Face Hub to the aiXplain platform. + + This method handles the deployment of a Hugging Face model, including + authentication and configuration setup. Args: - name (Text): The user's name for the model. - hf_repo_id (Text): The Hugging Face repository ID for this model ({author}/{model name}). - hf_token (Text, optional): Hugging Face access token. Defaults to None. - api_key (Text, optional): Team API key. Defaults to None. + name (Text): Display name for the deployed model. + hf_repo_id (Text): Hugging Face repository ID in 'author/model-name' format. + revision (Optional[Text], optional): Specific model revision/commit hash. + Defaults to empty string (latest version). + hf_token (Optional[Text], optional): Hugging Face access token for private models. + Defaults to empty string. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + Returns: - Dict: Backend response + Dict: Deployment response containing model ID and status information. """ supplier, model_name = hf_repo_id.split("/") deploy_url = urljoin(config.BACKEND_URL, "sdk/model-onboarding/onboard") @@ -381,14 +412,23 @@ def deploy_huggingface_model( @classmethod def get_huggingface_model_status( cls, model_id: Text, api_key: Optional[Text] = None - ): - """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID. + ) -> Dict: + """Check the deployment status of a Hugging Face model. + + This method retrieves the current status and details of a deployed + Hugging Face model. Args: - model_id (Text): The model's ID as returned by DEPLOY_HUGGINGFACE_MODEL - api_key (Text, optional): Team API key. Defaults to None. + model_id (Text): Model ID returned by deploy_huggingface_model. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + Returns: - Dict: Backend response + Dict: Status response containing: + - status: Current deployment status + - name: Model name + - id: Model ID + - pricing: Pricing information """ status_url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") if api_key: diff --git a/aixplain/factories/model_factory/mixins/model_getter.py b/aixplain/factories/model_factory/mixins/model_getter.py index 79632146..8f5048c6 100644 --- a/aixplain/factories/model_factory/mixins/model_getter.py +++ b/aixplain/factories/model_factory/mixins/model_getter.py @@ -10,9 +10,31 @@ class ModelGetterMixin: + """Mixin class providing model retrieval functionality. + + This mixin provides methods for retrieving model instances from the backend, + with support for caching to improve performance. + """ @classmethod def get(cls, model_id: Text, api_key: Optional[Text] = None, use_cache: bool = False) -> Model: - """Create a 'Model' object from model id""" + """Retrieve a model instance by its ID. + + This method attempts to retrieve a model from the cache if enabled, + falling back to fetching from the backend if necessary. + + Args: + model_id (Text): ID of the model to retrieve. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + use_cache (bool, optional): Whether to attempt retrieving from cache. + Defaults to False. + + Returns: + Model: Retrieved model instance. + + Raises: + Exception: If the model cannot be retrieved or doesn't exist. + """ model_id = model_id.replace("/", "%2F") cache = AssetCache(Model) @@ -44,6 +66,22 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None, use_cache: bool = F @classmethod def _fetch_model_by_id(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: + """Fetch a model directly from the backend by its ID. + + This internal method handles the direct API communication to retrieve + a model's details from the backend. + + Args: + model_id (Text): ID of the model to fetch. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + + Returns: + Model: Fetched model instance. + + Raises: + Exception: If the API request fails or returns an error. + """ resp = None try: url = urljoin(cls.backend_url, f"sdk/models/{model_id}") diff --git a/aixplain/factories/model_factory/mixins/model_list.py b/aixplain/factories/model_factory/mixins/model_list.py index a54de035..a8158fbb 100644 --- a/aixplain/factories/model_factory/mixins/model_list.py +++ b/aixplain/factories/model_factory/mixins/model_list.py @@ -5,6 +5,11 @@ class ModelListMixin: + """Mixin class providing model listing functionality. + + This mixin provides methods for retrieving lists of models with various + filtering and sorting options. + """ @classmethod def list( cls, @@ -21,24 +26,50 @@ def list( page_size: int = 20, model_ids: Optional[List[Text]] = None, api_key: Optional[Text] = None, - ) -> List[Model]: - """Gets the first k given models based on the provided task and language filters + ) -> dict: + """List and filter available models with pagination support. + + This method provides comprehensive filtering capabilities for retrieving + models. It supports two modes: + 1. Filtering by model IDs (exclusive of other filters) + 2. Filtering by various criteria (function, language, etc.) Args: - query (Optional[Text], optional): query filter. Defaults to "". - function (Function): function filter. - source_languages (Optional[Union[Language, List[Language]]], optional): language filter of input data. Defaults to None. - target_languages (Optional[Union[Language, List[Language]]], optional): language filter of output data. Defaults to None. - is_finetunable (Optional[bool], optional): can be finetuned or not. Defaults to None. - ownership (Optional[Tuple[OwnershipType, List[OwnershipType]]], optional): Ownership filters (e.g. SUBSCRIBED, OWNER). Defaults to None. - sort_by (Optional[SortBy], optional): sort the retrived models by a specific attribute, - page_number (int, optional): page number. Defaults to 0. - page_size (int, optional): page size. Defaults to 20. - model_ids (Optional[List[Text]], optional): model ids to filter. Defaults to None. - api_key (Optional[Text], optional): Team API key. Defaults to None. + query (Optional[Text], optional): Search query to filter models. + Defaults to "". + function (Optional[Function], optional): Filter by model function/task. + Defaults to None. + suppliers (Optional[Union[Supplier, List[Supplier]]], optional): Filter by + supplier(s). Defaults to None. + source_languages (Optional[Union[Language, List[Language]]], optional): + Filter by input language(s). Defaults to None. + target_languages (Optional[Union[Language, List[Language]]], optional): + Filter by output language(s). Defaults to None. + is_finetunable (Optional[bool], optional): Filter by fine-tuning capability. + Defaults to None. + ownership (Optional[Tuple[OwnershipType, List[OwnershipType]]], optional): + Filter by ownership type (e.g., SUBSCRIBED, OWNER). Defaults to None. + sort_by (Optional[SortBy], optional): Attribute to sort results by. + Defaults to None. + sort_order (SortOrder, optional): Sort direction (ascending/descending). + Defaults to SortOrder.ASCENDING. + page_number (int, optional): Page number for pagination. Defaults to 0. + page_size (int, optional): Number of results per page. Defaults to 20. + model_ids (Optional[List[Text]], optional): List of specific model IDs to retrieve. + If provided, other filters are ignored. Defaults to None. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. Returns: - List[Model]: List of models based on given filters + dict: Dictionary containing: + - results (List[Model]): List of models matching the criteria + - page_total (int): Number of models in current page + - page_number (int): Current page number + - total (int): Total number of models matching the criteria + + Raises: + AssertionError: If model_ids is provided with other filters, or if + page_size is less than the number of requested model_ids. """ if model_ids is not None: assert len(model_ids) > 0, "Please provide at least one model id" diff --git a/aixplain/factories/model_factory/utils.py b/aixplain/factories/model_factory/utils.py index 9f32e51f..2cd049ee 100644 --- a/aixplain/factories/model_factory/utils.py +++ b/aixplain/factories/model_factory/utils.py @@ -18,13 +18,29 @@ def create_model_from_response(response: Dict) -> Model: - """Converts response Json to 'Model' object + """Convert API response JSON into appropriate Model object. + + This function creates the correct type of Model object (Model, LLM, IndexModel, + Integration, ConnectionTool, MCPConnection, or UtilityModel) based on the + function type and parameters in the response. Args: - response (Dict): Json from API + response (Dict): API response containing model information including: + - id: Model identifier + - name: Model name + - function: Function type information + - params: Model parameters + - api_key: Optional API key + - attributes: Optional model attributes + - code: Optional model code + - version: Optional version information Returns: - Model: Coverted 'Model' object + Model: Instantiated model object of the appropriate subclass based on + the function type. + + Raises: + Exception: If required code is not found for UtilityModel. """ if "api_key" not in response: response["api_key"] = config.TEAM_API_KEY @@ -140,6 +156,41 @@ def get_assets_from_page( sort_order: SortOrder = SortOrder.ASCENDING, api_key: Optional[str] = None, ) -> List[Model]: + """Retrieve a paginated list of models with specified filters. + + This function fetches a page of models from the aiXplain platform, applying + various filters such as function type, suppliers, languages, and ownership. + + Args: + query: Search query string to filter models. + page_number (int): Page number to retrieve (0-based). + page_size (int): Number of models per page. + function (Function): Function type to filter models by. + suppliers (Union[Supplier, List[Supplier]]): Single supplier or list of + suppliers to filter models by. + source_languages (Union[Language, List[Language]]): Source language(s) + supported by the models. + target_languages (Union[Language, List[Language]]): Target language(s) + for translation models. + is_finetunable (bool, optional): Filter for fine-tunable models. + Defaults to None. + ownership (Optional[Tuple[OwnershipType, List[OwnershipType]]], optional): + Filter by model ownership type. Defaults to None. + sort_by (Optional[SortBy], optional): Field to sort results by. + Defaults to None. + sort_order (SortOrder, optional): Sort direction (ascending/descending). + Defaults to SortOrder.ASCENDING. + api_key (Optional[str], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + + Returns: + Tuple[List[Model], int]: A tuple containing: + - List of Model objects matching the filters + - Total number of models matching the filters + + Raises: + Exception: If the API request fails or returns an error. + """ try: url = urljoin(config.BACKEND_URL, "sdk/models/paginate") filter_params = {"q": query, "pageNumber": page_number, "pageSize": page_size} @@ -205,6 +256,25 @@ def get_assets_from_page( def get_model_from_ids(model_ids: List[str], api_key: Optional[str] = None) -> List[Model]: + """Retrieve multiple models by their IDs. + + This function fetches multiple models from the aiXplain platform in a single + request using their unique identifiers. + + Args: + model_ids (List[str]): List of model IDs to retrieve. + api_key (Optional[str], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + + Returns: + List[Model]: List of Model objects corresponding to the provided IDs. + Each model will be instantiated as the appropriate subclass based + on its function type. + + Raises: + Exception: If the API request fails or returns an error, including + cases where models are not found or access is denied. + """ from aixplain.factories.model_factory.utils import create_model_from_response resp = None diff --git a/aixplain/factories/pipeline_factory/__init__.py b/aixplain/factories/pipeline_factory/__init__.py index f2606ca2..0d062f03 100644 --- a/aixplain/factories/pipeline_factory/__init__.py +++ b/aixplain/factories/pipeline_factory/__init__.py @@ -37,24 +37,37 @@ class PipelineFactory: - """A static class for creating and exploring Pipeline Objects. + """Factory class for creating, managing, and exploring pipeline objects. + + This class provides functionality for creating new pipelines, retrieving existing + pipelines, and managing pipeline configurations in the aiXplain platform. Attributes: - backend_url (str): The URL for the backend. + backend_url (str): Base URL for the aiXplain backend API. """ backend_url = config.BACKEND_URL @classmethod def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: - """Create a 'Pipeline' object from pipeline id + """Retrieve a pipeline by its ID. + + This method fetches an existing pipeline from the aiXplain platform using + its unique identifier. Args: - pipeline_id (Text): Pipeline ID of required pipeline. - api_key (Optional[Text], optional): Pipeline API key. Defaults to None. + pipeline_id (Text): Unique identifier of the pipeline to retrieve. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. Returns: - Pipeline: Created 'Pipeline' object + Pipeline: Retrieved pipeline object with its configuration and architecture. + + Raises: + Exception: If the pipeline cannot be retrieved, including cases where: + - Pipeline ID is invalid + - Network error occurs + - Authentication fails """ resp = None try: @@ -110,12 +123,21 @@ def create_asset_from_id(cls, pipeline_id: Text) -> Pipeline: @classmethod def get_assets_from_page(cls, page_number: int) -> List[Pipeline]: - """Get the list of pipelines from a given page + """Retrieve a paginated list of pipelines. + + This method fetches a page of pipelines from the aiXplain platform. + Each page contains up to 10 pipelines. Args: - page_number (int): Page from which pipelines are to be listed + page_number (int): Zero-based page number to retrieve. + Returns: - List[Pipeline]: List of pipelines based on given filters + List[Pipeline]: List of pipeline objects on the specified page. + Returns an empty list if an error occurs or no pipelines are found. + + Note: + This method is primarily used internally by get_first_k_assets. + For more control over pipeline listing, use the list method instead. """ try: url = urljoin(cls.backend_url, f"sdk/pipelines/?pageNumber={page_number}") @@ -137,12 +159,21 @@ def get_assets_from_page(cls, page_number: int) -> List[Pipeline]: @classmethod def get_first_k_assets(cls, k: int) -> List[Pipeline]: - """Gets the first k given pipelines based on the provided task and language filters + """Retrieve the first K pipelines from the platform. + + This method fetches up to K pipelines by making multiple paginated requests + as needed (10 pipelines per page). Args: - k (int): Number of pipelines to get + k (int): Number of pipelines to retrieve. Must be positive. + Returns: - List[Pipeline]: List of pipelines based on given filters + List[Pipeline]: List of up to K pipeline objects. + Returns an empty list if an error occurs. + + Note: + For more control over pipeline listing, use the list method instead. + This method is maintained for backwards compatibility. """ try: pipeline_list = [] @@ -168,6 +199,41 @@ def list( page_size: int = 20, drafts_only: bool = False, ) -> Dict: + """List and filter pipelines with pagination support. + + This method provides comprehensive filtering and pagination capabilities + for retrieving pipelines from the aiXplain platform. + + Args: + query (Optional[Text], optional): Search query to filter pipelines by name + or description. Defaults to None. + functions (Optional[Union[Function, List[Function]]], optional): Filter by + function type(s). Defaults to None. + suppliers (Optional[Union[Supplier, List[Supplier]]], optional): Filter by + supplier(s). Defaults to None. + models (Optional[Union[Model, List[Model]]], optional): Filter by specific + model(s) used in pipelines. Defaults to None. + input_data_types (Optional[Union[DataType, List[DataType]]], optional): + Filter by input data type(s). Defaults to None. + output_data_types (Optional[Union[DataType, List[DataType]]], optional): + Filter by output data type(s). Defaults to None. + page_number (int, optional): Zero-based page number. Defaults to 0. + page_size (int, optional): Number of items per page (1-100). + Defaults to 20. + drafts_only (bool, optional): If True, only return draft pipelines. + Defaults to False. + + Returns: + Dict: Response containing: + - results (List[Pipeline]): List of pipeline objects + - page_total (int): Total items in current page + - page_number (int): Current page number + - total (int): Total number of items across all pages + + Raises: + Exception: If the request fails or if page_size is invalid. + AssertionError: If page_size is not between 1 and 100. + """ url = urljoin(cls.backend_url, "sdk/pipelines/paginate") @@ -243,14 +309,18 @@ def list( @classmethod def init(cls, name: Text, api_key: Optional[Text] = None) -> Pipeline: - """Initialize a new Pipeline + """Initialize a new empty pipeline. + + This method creates a new pipeline instance with no nodes or links, + ready for configuration. Args: - name (Text): Pipeline Name - api_key (Optional[Text], optional): Team API Key to create the Pipeline. Defaults to None. + name (Text): Name of the pipeline. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. Returns: - Pipeline: instance of the new pipeline + Pipeline: New pipeline instance with empty configuration. """ if api_key is None: api_key = config.TEAM_API_KEY @@ -270,18 +340,29 @@ def create( pipeline: Union[Text, Dict], api_key: Optional[Text] = None, ) -> Pipeline: - """Draft Pipeline Creation + """Create a new draft pipeline. - Args: - name (Text): Pipeline Name - pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file - api_key (Optional[Text], optional): Team API Key to create the Pipeline. Defaults to None. + This method creates a new pipeline in draft status from a configuration + provided either as a Python dictionary or a JSON file. - Raises: - Exception: Currently just the creation of draft pipelines are supported + Args: + name (Text): Name of the pipeline. + pipeline (Union[Text, Dict]): Pipeline configuration either as: + - Dict: Python dictionary containing nodes and links + - Text: Path to a JSON file containing the configuration + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. Returns: - Pipeline: instance of the new pipeline + Pipeline: Created pipeline instance in draft status. + + Raises: + Exception: If: + - JSON file path is invalid + - File extension is not .json + - Pipeline creation request fails + - Pipeline configuration is invalid + AssertionError: If the pipeline file doesn't exist or isn't a JSON file. """ try: if isinstance(pipeline, str) is True: diff --git a/aixplain/factories/pipeline_factory/utils.py b/aixplain/factories/pipeline_factory/utils.py index b571b427..7c87304e 100644 --- a/aixplain/factories/pipeline_factory/utils.py +++ b/aixplain/factories/pipeline_factory/utils.py @@ -21,14 +21,32 @@ def build_from_response(response: Dict, load_architecture: bool = False) -> Pipeline: - """Converts response Json to 'Pipeline' object + """Convert API response into a Pipeline object. + + This function creates a Pipeline object from an API response, optionally loading + its full architecture including nodes and links. The architecture can include + various node types like Input, Output, BareAsset, BareMetric, Decision, Router, + Script, BareSegmentor, and BareReconstructor. Args: - response (Dict): Json from API - load_architecture (bool, optional): If True, the architecture will be loaded. Defaults to False. + response (Dict): API response containing pipeline information including: + - id: Pipeline identifier + - name: Pipeline name + - api_key: Optional API key + - status: Pipeline status (defaults to "draft") + - nodes: Optional list of node configurations + - links: Optional list of link configurations + load_architecture (bool, optional): Whether to load the full pipeline + architecture including nodes and links. Defaults to False. Returns: - Pipeline: Coverted 'Pipeline' object + Pipeline: Instantiated pipeline object. If load_architecture is True, + includes all configured nodes and links. If architecture loading fails, + returns a pipeline with empty nodes and links lists. + + Note: + When loading architecture, decision nodes with passthrough parameters are + processed first to ensure proper parameter linking. """ if "api_key" not in response: response["api_key"] = config.TEAM_API_KEY diff --git a/aixplain/factories/script_factory.py b/aixplain/factories/script_factory.py index 14835752..f17f3375 100644 --- a/aixplain/factories/script_factory.py +++ b/aixplain/factories/script_factory.py @@ -8,8 +8,26 @@ class ScriptFactory: + """A factory class for handling script file operations. + + This class provides functionality for uploading script files to the backend + and managing their metadata. + """ @classmethod def upload_script(cls, script_path: str) -> Tuple[str, str]: + """Uploads a script file to the backend and returns its ID and metadata. + + Args: + script_path (str): The file system path to the script file to be uploaded. + + Returns: + Tuple[str, str]: A tuple containing: + - file_id (str): The unique identifier assigned to the uploaded file. + - metadata (str): JSON string containing file metadata (name and size). + + Raises: + Exception: If the upload fails or the file cannot be accessed. + """ try: url = f"{config.BACKEND_URL}/sdk/pipelines/script" headers = {"Authorization": f"Token {config.TEAM_API_KEY}"} diff --git a/aixplain/factories/team_agent_factory/__init__.py b/aixplain/factories/team_agent_factory/__init__.py index 16e9794c..bbdf7c25 100644 --- a/aixplain/factories/team_agent_factory/__init__.py +++ b/aixplain/factories/team_agent_factory/__init__.py @@ -38,6 +38,14 @@ class TeamAgentFactory: + """Factory class for creating and managing team agents. + + This class provides functionality for creating new team agents, retrieving existing + team agents, and managing team agent configurations in the aiXplain platform. + Team agents can be composed of multiple individual agents, LLMs, and inspectors + working together to accomplish complex tasks. + """ + @classmethod def create( cls, @@ -245,7 +253,26 @@ def create( @classmethod def create_from_dict(cls, dict: Dict) -> TeamAgent: - """Create a team agent from a dictionary.""" + """Create a team agent from a dictionary representation. + + This method instantiates a TeamAgent object from a dictionary containing + the agent's configuration. + + Args: + dict (Dict): Dictionary containing team agent configuration including: + - id: Team agent identifier + - name: Team agent name + - agents: List of agent configurations + - llm: Optional LLM configuration + - supervisor_llm: Optional supervisor LLM configuration + - mentalist_llm: Optional mentalist LLM configuration + + Returns: + TeamAgent: Instantiated team agent with validated configuration. + + Raises: + Exception: If validation fails or required fields are missing. + """ team_agent = TeamAgent.from_dict(dict) team_agent.validate(raise_exception=True) team_agent.url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{team_agent.id}/run") @@ -253,7 +280,22 @@ def create_from_dict(cls, dict: Dict) -> TeamAgent: @classmethod def list(cls) -> Dict: - """List all agents available in the platform.""" + """List all team agents available in the platform. + + This method retrieves all team agents accessible to the current user, + using the configured API key. + + Returns: + Dict: Response containing: + - results (List[TeamAgent]): List of team agent objects + - page_total (int): Total items in current page + - page_number (int): Current page number (always 0) + - total (int): Total number of team agents + + Raises: + Exception: If the request fails or returns an error, including cases + where authentication fails or the service is unavailable. + """ url = urljoin(config.BACKEND_URL, "sdk/agent-communities") headers = {"x-api-key": config.TEAM_API_KEY, "Content-Type": "application/json"} @@ -285,7 +327,26 @@ def list(cls) -> Dict: @classmethod def get(cls, agent_id: Text, api_key: Optional[Text] = None) -> TeamAgent: - """Get agent by id.""" + """Retrieve a team agent by its ID. + + This method fetches a specific team agent from the platform using its + unique identifier. + + Args: + agent_id (Text): Unique identifier of the team agent to retrieve. + api_key (Optional[Text], optional): API key for authentication. + Defaults to None, using the configured TEAM_API_KEY. + + Returns: + TeamAgent: Retrieved team agent with its full configuration. + + Raises: + Exception: If: + - Team agent ID is invalid + - Authentication fails + - Service is unavailable + - Other API errors occur + """ url = urljoin(config.BACKEND_URL, f"sdk/agent-communities/{agent_id}") api_key = api_key if api_key is not None else config.TEAM_API_KEY headers = {"x-api-key": api_key, "Content-Type": "application/json"} diff --git a/aixplain/factories/team_agent_factory/inspector_factory.py b/aixplain/factories/team_agent_factory/inspector_factory.py index 0d68d1cd..a6d38d4f 100644 --- a/aixplain/factories/team_agent_factory/inspector_factory.py +++ b/aixplain/factories/team_agent_factory/inspector_factory.py @@ -1,13 +1,21 @@ -"""Factory for inspectors. +"""Factory module for creating and configuring inspector agents. -Example usage: +This module provides functionality for creating inspector agents that can validate +and monitor team agent operations. Inspectors can be created from existing models +or using automatic configurations. -inspector = InspectorFactory.create_from_model( - name="my_inspector", - model_id="my_model", - model_config={"prompt": "Check if the data is safe to use."}, - policy=InspectorPolicy.ADAPTIVE, -) +Example: + Create an inspector from a model with adaptive policy:: + + inspector = InspectorFactory.create_from_model( + name="my_inspector", + model_id="my_model", + model_config={"prompt": "Check if the data is safe to use."}, + policy=InspectorPolicy.ADAPTIVE, + ) + +Note: + Currently only supports GUARDRAILS and TEXT_GENERATION models as inspectors. """ import logging @@ -27,7 +35,12 @@ class InspectorFactory: - """A class for creating an Inspector instance.""" + """Factory class for creating and configuring inspector agents. + + This class provides methods for creating inspector agents either from existing + models or using automatic configurations. Inspectors are used to validate and + monitor team agent operations, providing feedback and enforcing policies. + """ @classmethod def create_from_model( @@ -39,14 +52,31 @@ def create_from_model( ) -> Inspector: """Create a new inspector agent from an onboarded model. + This method creates an inspector agent using an existing model that has been + onboarded to the platform. The model must be of a supported function type + (currently GUARDRAILS or TEXT_GENERATION). + Args: - name: Name of the inspector agent. - model: Model or model ID to use for inspector. - model_config: Configuration for the inspector. Defaults to None. - policy: Action to take upon negative feedback (WARN/ABORT/ADAPTIVE). Defaults to ADAPTIVE. + name (Text): Name of the inspector agent. + model (Union[Text, Model]): Either a Model instance or model ID string + to use for the inspector. + model_config (Optional[Dict], optional): Configuration parameters for + the inspector model (e.g., prompts, thresholds). Defaults to None. + policy (InspectorPolicy, optional): Action to take upon negative feedback: + - WARN: Log warning but continue execution + - ABORT: Stop execution on negative feedback + - ADAPTIVE: Dynamically decide based on context + Defaults to InspectorPolicy.ADAPTIVE. Returns: - Inspector: The created inspector + Inspector: Created and configured inspector agent. + + Raises: + ValueError: If: + - Model ID is invalid + - Model is not onboarded + - Model function is not supported + Exception: If model retrieval fails """ # fetch model if model ID is provided if isinstance(model, Text): @@ -96,14 +126,25 @@ def create_auto( name: Optional[Text] = None, policy: InspectorPolicy = InspectorPolicy.ADAPTIVE, ) -> Inspector: - """Create a new inspector agent from an automatically configured inspector. + """Create a new inspector agent using automatic configuration. + + This method creates an inspector agent using a pre-configured InspectorAuto + instance, which provides automatic inspection capabilities without requiring + a specific model. Args: - auto: The automatically configured inspector. - policy: Action to take upon negative feedback (WARN/ABORT/ADAPTIVE). Defaults to ADAPTIVE. + auto (InspectorAuto): Pre-configured automatic inspector instance. + name (Optional[Text], optional): Name for the inspector. If not provided, + uses the name from the auto configuration. Defaults to None. + policy (InspectorPolicy, optional): Action to take upon negative feedback: + - WARN: Log warning but continue execution + - ABORT: Stop execution on negative feedback + - ADAPTIVE: Dynamically decide based on context + Defaults to InspectorPolicy.ADAPTIVE. Returns: - Inspector: The created inspector. + Inspector: Created and configured inspector agent using automatic + inspection capabilities. """ return Inspector( name=name or auto.get_name(), diff --git a/aixplain/factories/team_agent_factory/utils.py b/aixplain/factories/team_agent_factory/utils.py index 16b65641..e3ecdf64 100644 --- a/aixplain/factories/team_agent_factory/utils.py +++ b/aixplain/factories/team_agent_factory/utils.py @@ -18,7 +18,40 @@ def build_team_agent(payload: Dict, agents: List[Agent] = None, api_key: Text = config.TEAM_API_KEY) -> TeamAgent: - """Instantiate a new team agent in the platform.""" + """Build a TeamAgent instance from configuration payload. + + This function creates a TeamAgent instance from a configuration payload, + handling the setup of agents, LLMs, inspectors, and task dependencies. + + Args: + payload (Dict): Configuration dictionary containing: + - id: Optional team agent ID + - name: Team agent name + - agents: List of agent configurations + - description: Optional description + - role: Optional instructions + - teamId: Optional supplier information + - version: Optional version + - cost: Optional cost information + - llmId: LLM model ID (defaults to GPT-4) + - plannerId: Optional planner model ID + - inspectors: Optional list of inspector configurations + - inspectorTargets: Optional list of inspection targets + - status: Team agent status + - tools: Optional list of tool configurations + agents (List[Agent], optional): Pre-instantiated agent objects. If not + provided, agents will be instantiated from IDs in the payload. + Defaults to None. + api_key (Text, optional): API key for authentication. Defaults to + config.TEAM_API_KEY. + + Returns: + TeamAgent: Configured team agent instance with all components initialized. + + Raises: + Exception: If a task dependency referenced in an agent's configuration + cannot be found. + """ agents_dict = payload["agents"] payload_agents = agents if payload_agents is None: diff --git a/aixplain/factories/tool_factory.py b/aixplain/factories/tool_factory.py index 8a9e9160..35d8d38e 100644 --- a/aixplain/factories/tool_factory.py +++ b/aixplain/factories/tool_factory.py @@ -15,6 +15,19 @@ class ToolFactory(ModelGetterMixin, ModelListMixin): + """A factory class for creating and managing various types of tools including indexes, scripts, and connections. + + This class provides functionality to create and manage different types of tools: + - Script models (utility models) + - Search collections (index models) + - Connectors (integration models) + + The factory inherits from ModelGetterMixin and ModelListMixin to provide model retrieval + and listing capabilities. + + Attributes: + backend_url: The URL endpoint for the backend API. + """ backend_url = config.BACKEND_URL @@ -26,7 +39,23 @@ def recreate( params: Optional[Union[BaseUtilityModelParams, BaseIndexParams, BaseAuthenticationParams]] = None, data: Optional[Dict] = None, **kwargs, - ) -> Model: + ) -> Model: + """Recreates a tool based on an existing tool's configuration. + + This method creates a new tool instance using the configuration of an existing tool. + It's useful for creating copies or variations of existing tools. + + Args: + integration (Optional[Union[Text, Model]], optional): The integration model or its ID. Defaults to None. + tool (Optional[Union[Text, Model]], optional): The existing tool model or its ID to recreate from. Defaults to None. + params (Optional[Union[BaseUtilityModelParams, BaseIndexParams, BaseAuthenticationParams]], optional): + Parameters for the new tool. Defaults to None. + data (Optional[Dict], optional): Additional data for tool creation. Defaults to None. + **kwargs: Additional keyword arguments passed to the tool creation process. + + Returns: + Model: The newly created tool model. + """ if data is None: data = {} data["assetId"] = tool.id if isinstance(tool, Model) else tool diff --git a/aixplain/factories/wallet_factory.py b/aixplain/factories/wallet_factory.py index 2de28ec4..d373cfd7 100644 --- a/aixplain/factories/wallet_factory.py +++ b/aixplain/factories/wallet_factory.py @@ -6,11 +6,32 @@ class WalletFactory: + """A factory class for retrieving wallet information. + + This class provides functionality to fetch wallet details including total + and reserved balance information from the backend API. + + Attributes: + backend_url: The URL endpoint for the backend API. + """ backend_url = config.BACKEND_URL @classmethod def get(cls, api_key: Text = config.TEAM_API_KEY) -> Wallet: - """Get wallet information""" + """Retrieves the current wallet information from the backend. + + This method fetches the wallet details including total balance and reserved balance + using the provided API key. + + Args: + api_key (Text, optional): The API key for authentication. Defaults to config.TEAM_API_KEY. + + Returns: + Wallet: A Wallet object containing the total and reserved balance information. + + Raises: + Exception: If the wallet information cannot be retrieved from the backend. + """ try: resp = None url = f"{cls.backend_url}/sdk/billing/wallet" diff --git a/aixplain/modules/agent/__init__.py b/aixplain/modules/agent/__init__.py index 794b1d14..362e72f3 100644 --- a/aixplain/modules/agent/__init__.py +++ b/aixplain/modules/agent/__init__.py @@ -45,20 +45,30 @@ class Agent(Model, DeployableMixin[Tool]): - """Advanced AI system capable of performing tasks by leveraging specialized software tools and resources from aiXplain marketplace. + """An advanced AI system that performs tasks using specialized tools from the aiXplain marketplace. + + This class represents an AI agent that can understand natural language instructions, + use various tools and models, and execute complex tasks. It combines a large language + model (LLM) with specialized tools to provide comprehensive task-solving capabilities. Attributes: - id (Text): ID of the Agent - name (Text): Name of the Agent - tools (List[Union[Tool, Model]]): List of tools that the Agent uses. - description (Text, optional): description of the Agent. Defaults to "". - instructions (Text): instructions of the Agent. - llm_id (Text): large language model. Defaults to GPT-4o (6646261c6eb563165658bbb1). - supplier (Text): Supplier of the Agent. - version (Text): Version of the Agent. - backend_url (str): URL of the backend. - api_key (str): The TEAM API key used for authentication. - cost (Dict, optional): model price. Defaults to None. + id (Text): ID of the Agent. + name (Text): Name of the Agent. + tools (List[Union[Tool, Model]]): Collection of tools and models the Agent can use. + description (Text, optional): Detailed description of the Agent's capabilities. + Defaults to "". + instructions (Text): System instructions/prompt defining the Agent's behavior. + llm_id (Text): ID of the large language model. Defaults to GPT-4o + (6646261c6eb563165658bbb1). + llm (Optional[LLM]): The LLM instance used by the Agent. + supplier (Text): The provider/creator of the Agent. + version (Text): Version identifier of the Agent. + status (AssetStatus): Current status of the Agent (DRAFT or ONBOARDED). + tasks (List[AgentTask]): List of tasks the Agent can perform. + backend_url (str): URL endpoint for the backend API. + api_key (str): Authentication key for API access. + cost (Dict, optional): Pricing information for using the Agent. Defaults to None. + is_valid (bool): Whether the Agent's configuration is valid. """ is_valid: bool @@ -80,21 +90,35 @@ def __init__( tasks: List[AgentTask] = [], **additional_info, ) -> None: - """Create an Agent with the necessary information. + """Initialize a new Agent instance. Args: - id (Text): ID of the Agent - name (Text): Name of the Agent - description (Text): description of the Agent. - instructions (Text): role of the Agent. - tools (List[Union[Tool, Model]]): List of tools that the Agent uses. - llm_id (Text, optional): large language model ID. Defaults to GPT-4o (6646261c6eb563165658bbb1). - llm (LLM, optional): large language model object. Defaults to None. - supplier (Text): Supplier of the Agent. - version (Text): Version of the Agent. - backend_url (str): URL of the backend. - api_key (str): The TEAM API key used for authentication. - cost (Dict, optional): model price. Defaults to None. + id (Text): ID of the Agent. + name (Text): Name of the Agent. + description (Text): Detailed description of the Agent's capabilities. + instructions (Optional[Text], optional): System instructions/prompt defining + the Agent's behavior. Defaults to None. + tools (List[Union[Tool, Model]], optional): Collection of tools and models + the Agent can use. Defaults to empty list. + llm_id (Text, optional): ID of the large language model. Defaults to GPT-4o + (6646261c6eb563165658bbb1). + llm (Optional[LLM], optional): The LLM instance to use. If provided, takes + precedence over llm_id. Defaults to None. + api_key (Optional[Text], optional): Authentication key for API access. + Defaults to config.TEAM_API_KEY. + supplier (Union[Dict, Text, Supplier, int], optional): The provider/creator + of the Agent. Defaults to "aiXplain". + version (Optional[Text], optional): Version identifier. Defaults to None. + cost (Optional[Dict], optional): Pricing information. Defaults to None. + status (AssetStatus, optional): Current status of the Agent. + Defaults to AssetStatus.DRAFT. + tasks (List[AgentTask], optional): List of tasks the Agent can perform. + Defaults to empty list. + **additional_info: Additional configuration parameters. + + Note: + The Agent must be validated before use. Invalid configurations will be + flagged and may prevent the Agent from running. """ super().__init__(id, name, description, api_key, supplier, version, cost=cost) self.instructions = instructions @@ -114,7 +138,17 @@ def __init__( self.is_valid = True def _validate(self) -> None: - """Validate the Agent.""" + """Perform internal validation of the Agent's configuration. + + This method checks: + 1. Name contains only valid characters + 2. LLM is a text generation model + 3. Tool names are unique + 4. No nested Agents are used + + Raises: + AssertionError: If any validation check fails. + """ from aixplain.utils.llm_utils import get_llm_instance # validate name @@ -144,7 +178,21 @@ def _validate(self) -> None: ) def validate(self, raise_exception: bool = False) -> bool: - """Validate the Agent.""" + """Validate the Agent's configuration and mark its validity status. + + This method runs all validation checks and updates the is_valid flag. + If validation fails, it can either raise an exception or log warnings. + + Args: + raise_exception (bool, optional): Whether to raise exceptions on validation + failures. If False, failures are logged as warnings. Defaults to False. + + Returns: + bool: True if validation passed, False otherwise. + + Raises: + Exception: If validation fails and raise_exception is True. + """ try: self._validate() self.is_valid = True @@ -464,7 +512,18 @@ def from_dict(cls, data: Dict) -> "Agent": ) def delete(self) -> None: - """Delete Agent service""" + """Delete this Agent from the aiXplain platform. + + This method attempts to delete the Agent. The operation will fail if the + Agent is being used by any team agents. + + Raises: + Exception: If deletion fails, with detailed error messages for different + failure scenarios: + - Agent is in use by accessible team agents (lists team agent IDs) + - Agent is in use by inaccessible team agents + - Other deletion errors (with HTTP status code) + """ try: url = urljoin(config.BACKEND_URL, f"sdk/agents/{self.id}") headers = { @@ -516,7 +575,19 @@ def delete(self) -> None: raise Exception(message) def update(self) -> None: - """Update agent.""" + """Update the Agent's configuration on the aiXplain platform. + + This method validates and updates the Agent's configuration. It is deprecated + in favor of the save() method. + + Raises: + Exception: If validation fails or if there are errors during the update. + DeprecationWarning: This method is deprecated, use save() instead. + + Note: + This method is deprecated and will be removed in a future version. + Please use save() instead. + """ import warnings import inspect @@ -551,8 +622,20 @@ def update(self) -> None: raise Exception(error_msg) def save(self) -> None: - """Save the Agent.""" + """Save the Agent's current configuration to the aiXplain platform. + + This method validates and saves any changes made to the Agent's configuration. + It is the preferred method for updating an Agent's settings. + + Raises: + Exception: If validation fails or if there are errors during the save operation. + """ self.update() - def __repr__(self): + def __repr__(self) -> str: + """Return a string representation of the Agent. + + Returns: + str: A string in the format "Agent: (id=)". + """ return f"Agent: {self.name} (id={self.id})" diff --git a/aixplain/modules/agent/agent_response.py b/aixplain/modules/agent/agent_response.py index 73c5e839..fc35e072 100644 --- a/aixplain/modules/agent/agent_response.py +++ b/aixplain/modules/agent/agent_response.py @@ -5,6 +5,23 @@ class AgentResponse(ModelResponse): + """A response object for agent execution results. + + This class extends ModelResponse to handle agent-specific response data, + including intermediate steps and execution statistics. It provides dictionary-like + access to response data and serialization capabilities. + + Attributes: + status (ResponseStatus): The status of the agent execution. + data (Optional[AgentResponseData]): Structured data from the agent execution. + details (Optional[Union[Dict, List]]): Additional execution details. + completed (bool): Whether the execution has completed. + error_message (Text): Error message if execution failed. + used_credits (float): Number of credits used for execution. + run_time (float): Total execution time in seconds. + usage (Optional[Dict]): Resource usage information. + url (Optional[Text]): URL for asynchronous result polling. + """ def __init__( self, status: ResponseStatus = ResponseStatus.FAILED, @@ -18,6 +35,29 @@ def __init__( url: Optional[Text] = None, **kwargs, ): + """Initialize a new AgentResponse instance. + + Args: + status (ResponseStatus, optional): The status of the agent execution. + Defaults to ResponseStatus.FAILED. + data (Optional[AgentResponseData], optional): Structured data from the + agent execution. Defaults to None. + details (Optional[Union[Dict, List]], optional): Additional execution + details. Defaults to {}. + completed (bool, optional): Whether the execution has completed. + Defaults to False. + error_message (Text, optional): Error message if execution failed. + Defaults to "". + used_credits (float, optional): Number of credits used for execution. + Defaults to 0.0. + run_time (float, optional): Total execution time in seconds. + Defaults to 0.0. + usage (Optional[Dict], optional): Resource usage information. + Defaults to None. + url (Optional[Text], optional): URL for asynchronous result polling. + Defaults to None. + **kwargs: Additional keyword arguments passed to ModelResponse. + """ super().__init__( status=status, @@ -34,11 +74,33 @@ def __init__( self.data = data or AgentResponseData() def __getitem__(self, key: Text) -> Any: + """Get a response attribute using dictionary-style access. + + Overrides the parent class's __getitem__ to handle AgentResponseData + serialization when accessing the 'data' key. + + Args: + key (Text): The name of the attribute to get. + + Returns: + Any: The value of the attribute. For 'data' key, returns the + serialized dictionary form. + """ if key == "data": return self.data.to_dict() return super().__getitem__(key) def __setitem__(self, key: Text, value: Any) -> None: + """Set a response attribute using dictionary-style access. + + Overrides the parent class's __setitem__ to handle AgentResponseData + deserialization when setting the 'data' key. + + Args: + key (Text): The name of the attribute to set. + value (Any): The value to assign. For 'data' key, can be either a + dictionary or AgentResponseData instance. + """ if key == "data" and isinstance(value, Dict): self.data = AgentResponseData.from_dict(value) elif key == "data" and isinstance(value, AgentResponseData): @@ -47,10 +109,25 @@ def __setitem__(self, key: Text, value: Any) -> None: super().__setitem__(key, value) def to_dict(self) -> Dict[Text, Any]: + """Convert the response to a dictionary representation. + + Overrides the parent class's to_dict to handle AgentResponseData + serialization in the output dictionary. + + Returns: + Dict[Text, Any]: A dictionary containing all response data, with the + 'data' field containing the serialized AgentResponseData. + """ base_dict = super().to_dict() base_dict["data"] = self.data.to_dict() return base_dict def __repr__(self) -> str: + """Return a string representation of the response. + + Returns: + str: A string showing all attributes and their values in a readable format, + with the class name changed from ModelResponse to AgentResponse. + """ fields = super().__repr__()[len("ModelResponse(") : -1] return f"AgentResponse({fields})" diff --git a/aixplain/modules/agent/agent_response_data.py b/aixplain/modules/agent/agent_response_data.py index 7a93b3aa..4de10d12 100644 --- a/aixplain/modules/agent/agent_response_data.py +++ b/aixplain/modules/agent/agent_response_data.py @@ -2,6 +2,19 @@ class AgentResponseData: + """A container for agent execution response data. + + This class encapsulates the input, output, and execution details of an agent's + response, including intermediate steps and execution statistics. + + Attributes: + input (Optional[Any]): The input provided to the agent. + output (Optional[Any]): The final output from the agent. + session_id (str): Identifier for the conversation session. + intermediate_steps (List[Any]): List of steps taken during execution. + execution_stats (Optional[Dict[str, Any]]): Statistics about the execution. + critiques (str): Any critiques or feedback about the execution. + """ def __init__( self, input: Optional[Any] = None, @@ -11,6 +24,22 @@ def __init__( execution_stats: Optional[Dict[str, Any]] = None, critiques: Optional[str] = None, ): + """Initialize a new AgentResponseData instance. + + Args: + input (Optional[Any], optional): The input provided to the agent. + Defaults to None. + output (Optional[Any], optional): The final output from the agent. + Defaults to None. + session_id (str, optional): Identifier for the conversation session. + Defaults to "". + intermediate_steps (Optional[List[Any]], optional): List of steps taken + during execution. Defaults to None. + execution_stats (Optional[Dict[str, Any]], optional): Statistics about + the execution. Defaults to None. + critiques (Optional[str], optional): Any critiques or feedback about + the execution. Defaults to None. + """ self.input = input self.output = output self.session_id = session_id @@ -20,6 +49,20 @@ def __init__( @classmethod def from_dict(cls, data: Dict[str, Any]) -> "AgentResponseData": + """Create an AgentResponseData instance from a dictionary. + + Args: + data (Dict[str, Any]): Dictionary containing response data with keys: + - input: The input provided to the agent + - output: The final output from the agent + - session_id: Identifier for the conversation session + - intermediate_steps: List of steps taken during execution + - executionStats: Statistics about the execution + - critiques: Any critiques or feedback + + Returns: + AgentResponseData: A new instance populated with the dictionary data. + """ return cls( input=data.get("input"), output=data.get("output"), @@ -30,6 +73,18 @@ def from_dict(cls, data: Dict[str, Any]) -> "AgentResponseData": ) def to_dict(self) -> Dict[str, Any]: + """Convert the response data to a dictionary representation. + + Returns: + Dict[str, Any]: A dictionary containing all response data with keys: + - input: The input provided to the agent + - output: The final output from the agent + - session_id: Identifier for the conversation session + - intermediate_steps: List of steps taken during execution + - executionStats: Statistics about the execution + - execution_stats: Alias for executionStats + - critiques: Any critiques or feedback + """ return { "input": self.input, "output": self.output, @@ -40,16 +95,38 @@ def to_dict(self) -> Dict[str, Any]: "critiques": self.critiques, } - def __getitem__(self, key): + def __getitem__(self, key: str) -> Any: + """Get an attribute value using dictionary-style access. + + Args: + key (str): The name of the attribute to get. + + Returns: + Any: The value of the attribute, or None if not found. + """ return getattr(self, key, None) - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: Any) -> None: + """Set an attribute value using dictionary-style access. + + Args: + key (str): The name of the attribute to set. + value (Any): The value to assign to the attribute. + + Raises: + KeyError: If the key is not a valid attribute of the class. + """ if hasattr(self, key): setattr(self, key, value) else: raise KeyError(f"{key} is not a valid attribute of {self.__class__.__name__}") def __repr__(self) -> str: + """Return a string representation of the response data. + + Returns: + str: A string showing all attributes and their values in a readable format. + """ return ( f"{self.__class__.__name__}(" f"input={self.input}, " @@ -61,6 +138,14 @@ def __repr__(self) -> str: ) def __contains__(self, key: Text) -> bool: + """Check if an attribute exists using 'in' operator. + + Args: + key (Text): The name of the attribute to check. + + Returns: + bool: True if the attribute exists and is accessible, False otherwise. + """ try: self[key] return True diff --git a/aixplain/modules/agent/agent_task.py b/aixplain/modules/agent/agent_task.py index 593a0d0f..469beba1 100644 --- a/aixplain/modules/agent/agent_task.py +++ b/aixplain/modules/agent/agent_task.py @@ -2,6 +2,18 @@ class AgentTask: + """A task definition for an AI agent to execute. + + This class represents a task that can be assigned to an agent, including its + description, expected output, and any dependencies on other tasks. + + Attributes: + name (Text): The unique identifier/name of the task. + description (Text): Detailed description of what the task should accomplish. + expected_output (Text): Description of the expected output format or content. + dependencies (Optional[List[Union[Text, AgentTask]]]): List of tasks or task + names that must be completed before this task. Defaults to None. + """ def __init__( self, name: Text, @@ -9,12 +21,34 @@ def __init__( expected_output: Text, dependencies: Optional[List[Union[Text, "AgentTask"]]] = None, ): + """Initialize a new AgentTask instance. + + Args: + name (Text): The unique identifier/name of the task. + description (Text): Detailed description of what the task should accomplish. + expected_output (Text): Description of the expected output format or content. + dependencies (Optional[List[Union[Text, AgentTask]]], optional): List of + tasks or task names that must be completed before this task. + Defaults to None. + """ self.name = name self.description = description self.expected_output = expected_output self.dependencies = dependencies - def to_dict(self): + def to_dict(self) -> dict: + """Convert the task to a dictionary representation. + + This method serializes the task data, converting any AgentTask dependencies + to their name strings. + + Returns: + dict: A dictionary containing the task data with keys: + - name: The task name + - description: The task description + - expectedOutput: The expected output description + - dependencies: List of dependency names or None + """ agent_task_dict = { "name": self.name, "description": self.description, diff --git a/aixplain/modules/agent/model_with_params.py b/aixplain/modules/agent/model_with_params.py index 431f8cd0..0757f586 100644 --- a/aixplain/modules/agent/model_with_params.py +++ b/aixplain/modules/agent/model_with_params.py @@ -41,11 +41,41 @@ class ModelWithParams(BaseModel, ABC): @field_validator("model_id") def validate_model_id(cls, v: Text) -> Text: + """Validate the model_id field. + + This validator ensures that the model_id is not empty or whitespace-only. + + Args: + cls: The class (automatically provided by pydantic). + v (Text): The value to validate. + + Returns: + Text: The validated model ID. + + Raises: + ValueError: If the model ID is empty or contains only whitespace. + """ if not v or not v.strip(): raise ValueError("Model ID is required") return v def __new__(cls, *args, **kwargs): + """Create a new instance of a ModelWithParams subclass. + + This method prevents direct instantiation of the abstract base class while + allowing subclasses to be instantiated normally. + + Args: + cls: The class being instantiated. + *args: Positional arguments for instance creation. + **kwargs: Keyword arguments for instance creation. + + Returns: + ModelWithParams: A new instance of a ModelWithParams subclass. + + Raises: + TypeError: If attempting to instantiate ModelWithParams directly. + """ if cls is ModelWithParams: raise TypeError("ModelWithParams is an abstract base class and cannot be instantiated directly") return super().__new__(cls) diff --git a/aixplain/modules/agent/output_format.py b/aixplain/modules/agent/output_format.py index 1e4984ad..5fbaedf0 100644 --- a/aixplain/modules/agent/output_format.py +++ b/aixplain/modules/agent/output_format.py @@ -26,6 +26,16 @@ class OutputFormat(Text, Enum): + """Enum representing different output formats for AI agent responses. + + This enum defines the possible output formats that can be used by AI agents. + Each format is represented by a string constant. + + Attributes: + MARKDOWN (Text): Markdown format for formatted text output. + TEXT (Text): Plain text output. + JSON (Text): JSON format for structured data output. + """ MARKDOWN = "markdown" TEXT = "text" JSON = "json" diff --git a/aixplain/modules/agent/tool/__init__.py b/aixplain/modules/agent/tool/__init__.py index 4b0ed0fc..b83c9704 100644 --- a/aixplain/modules/agent/tool/__init__.py +++ b/aixplain/modules/agent/tool/__init__.py @@ -44,13 +44,15 @@ def __init__( status: Optional[AssetStatus] = AssetStatus.DRAFT, **additional_info, ) -> None: - """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + """Initialize a new Tool instance. Args: - name (Text): name of the tool - description (Text): descriptiion of the tool - version (Text): version of the tool - api_key (Text): api key of the tool. Defaults to config.TEAM_API_KEY. + name (Text): The name of the tool. + description (Text): A description of the tool's functionality. + version (Optional[Text], optional): The version of the tool. Defaults to None. + api_key (Optional[Text], optional): The API key for authentication. Defaults to config.TEAM_API_KEY. + status (Optional[AssetStatus], optional): The current status of the tool. Defaults to AssetStatus.DRAFT. + **additional_info: Additional keyword arguments for tool configuration. """ self.name = name self.description = description @@ -60,11 +62,34 @@ def __init__( self.status = status def to_dict(self): - """Converts the tool to a dictionary.""" + """Converts the tool instance to a dictionary representation. + + Returns: + dict: A dictionary containing the tool's attributes and configuration. + + Raises: + NotImplementedError: This is an abstract method that must be implemented by subclasses. + """ raise NotImplementedError def validate(self): + """Validates the tool's configuration and settings. + + This method should check if all required attributes are properly set and + if the tool's configuration is valid. + + Raises: + NotImplementedError: This is an abstract method that must be implemented by subclasses. + """ raise NotImplementedError def deploy(self) -> None: + """Deploys the tool to make it available for use. + + This method should handle any necessary setup or deployment steps + required to make the tool operational. + + Raises: + NotImplementedError: This is an abstract method that must be implemented by subclasses. + """ raise NotImplementedError diff --git a/aixplain/modules/agent/tool/custom_python_code_tool.py b/aixplain/modules/agent/tool/custom_python_code_tool.py index 05715b2a..ef587c2d 100644 --- a/aixplain/modules/agent/tool/custom_python_code_tool.py +++ b/aixplain/modules/agent/tool/custom_python_code_tool.py @@ -29,12 +29,32 @@ class CustomPythonCodeTool(Tool): - """Custom Python Code Tool""" + """A tool for executing custom Python code in the aiXplain platform. + + This tool allows users to define and execute custom Python functions or code snippets + as part of their workflow. It supports both direct code input and callable functions. + + Attributes: + code (Union[Text, Callable]): The Python code to execute, either as a string or callable. + id (str): The identifier for the code interpreter model. + status (AssetStatus): The current status of the tool (DRAFT or ONBOARDED). + """ def __init__( self, code: Union[Text, Callable], description: Text = "", name: Optional[Text] = None, **additional_info ) -> None: - """Custom Python Code Tool""" + """Initialize a new CustomPythonCodeTool instance. + + Args: + code (Union[Text, Callable]): The Python code to execute, either as a string or callable function. + description (Text, optional): Description of what the code does. Defaults to "". + name (Optional[Text], optional): Name of the tool. Defaults to None. + **additional_info: Additional keyword arguments for tool configuration. + + Note: + If description or name are not provided, they may be automatically extracted + from the code's docstring if available. + """ super().__init__(name=name or "", description=description, **additional_info) self.code = code self.status = AssetStatus.ONBOARDED # TODO: change to DRAFT when we have a way to onboard the tool @@ -43,6 +63,17 @@ def __init__( self.validate() def to_dict(self): + """Convert the tool instance to a dictionary representation. + + Returns: + dict: A dictionary containing the tool's configuration with keys: + - id: The tool's identifier + - name: The tool's name + - description: The tool's description + - type: Always "utility" + - utility: Always "custom_python_code" + - utilityCode: The Python code to execute + """ return { "id": self.id, "name": self.name, @@ -53,6 +84,17 @@ def to_dict(self): } def validate(self): + """Validate the tool's configuration and code. + + This method performs several checks: + 1. Parses and validates the Python code if it's not an S3 URL + 2. Extracts description and name from code's docstring if not provided + 3. Ensures all required fields (description, code, name) are non-empty + 4. Verifies the tool status is either DRAFT or ONBOARDED + + Raises: + AssertionError: If any validation check fails, with a descriptive error message. + """ from aixplain.modules.model.utils import parse_code_decorated if not str(self.code).startswith("s3://"): @@ -79,7 +121,17 @@ def validate(self): ], "Custom Python Code Tool Error: Status must be DRAFT or ONBOARDED" def __repr__(self) -> Text: + """Return a string representation of the tool. + + Returns: + Text: A string in the format "CustomPythonCodeTool(name=)". + """ return f"CustomPythonCodeTool(name={self.name})" def deploy(self): + """Deploy the custom Python code tool. + + This is a placeholder method as custom Python code tools are automatically + deployed when created. + """ pass diff --git a/aixplain/modules/agent/tool/model_tool.py b/aixplain/modules/agent/tool/model_tool.py index 2f662ff6..da455188 100644 --- a/aixplain/modules/agent/tool/model_tool.py +++ b/aixplain/modules/agent/tool/model_tool.py @@ -52,12 +52,19 @@ def set_tool_name(function: Function, supplier: Supplier = None, model: Model = class ModelTool(Tool): - """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + """A tool that wraps AI models to execute specific tasks or functions based on user commands. + + This class provides a standardized interface for working with various AI models, + allowing them to be used as tools in the aiXplain platform. It handles model + configuration, validation, and parameter management. Attributes: - function (Optional[Function]): task that the tool performs. - supplier (Optional[Supplier]): Preferred supplier to perform the task. - model (Optional[Text]): Model function. + function (Optional[Function]): The task that the tool performs. + supplier (Optional[Supplier]): The preferred supplier to perform the task. + model (Optional[Union[Text, Model]]): The model ID or Model instance. + model_object (Optional[Model]): The actual Model instance for parameter access. + parameters (Optional[Dict]): Configuration parameters for the model. + status (AssetStatus): The current status of the tool. """ def __init__( @@ -70,15 +77,24 @@ def __init__( parameters: Optional[Dict] = None, **additional_info, ) -> None: - """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + """Initialize a new ModelTool instance. Args: - function (Optional[Union[Function, Text]]): task that the tool performs. Defaults to None. - supplier (Optional[Union[Dict, Supplier]]): Preferred supplier to perform the task. Defaults to None. Defaults to None. - model (Optional[Union[Text, Model]]): Model function. Defaults to None. - name (Optional[Text]): Name of the tool. Defaults to None. - description (Text): Description of the tool. Defaults to "". - parameters (Optional[Dict]): Parameters of the tool. Defaults to None. + function (Optional[Union[Function, Text]], optional): The task that the tool performs. Can be a Function enum + or a string that will be converted to a Function. Defaults to None. + supplier (Optional[Union[Dict, Supplier]], optional): The preferred supplier to perform the task. + Can be a Supplier enum or a dictionary with supplier information. Defaults to None. + model (Optional[Union[Text, Model]], optional): The model to use, either as a Model instance + or a model ID string. Defaults to None. + name (Optional[Text], optional): The name of the tool. If not provided, will be generated + from function, supplier, and model. Defaults to None. + description (Text, optional): A description of the tool's functionality. If not provided, + will be taken from model or function description. Defaults to "". + parameters (Optional[Dict], optional): Configuration parameters for the model. Defaults to None. + **additional_info: Additional keyword arguments for tool configuration. + + Raises: + Exception: If the specified model doesn't exist or is inaccessible. """ name = name or "" super().__init__(name=name, description=description, **additional_info) @@ -107,7 +123,23 @@ def __init__( self.validate() def to_dict(self) -> Dict: - """Converts the tool to a dictionary.""" + """Convert the tool instance to a dictionary representation. + + This method handles the conversion of complex attributes like supplier and model + into their serializable forms. + + Returns: + Dict: A dictionary containing the tool's configuration with keys: + - function: The function value or None + - type: Always "model" + - name: The tool's name + - description: The tool's description + - supplier: The supplier code or None + - version: The tool's version or None + - assetId: The model's ID + - parameters: The tool's parameters + - status: The tool's status + """ supplier = self.supplier if supplier is not None: if isinstance(supplier, dict): @@ -183,6 +215,12 @@ def validate(self) -> None: def get_parameters(self) -> Dict: + """Get the tool's parameters, either from explicit settings or the model object. + + Returns: + Dict: The tool's parameters. If no explicit parameters were set and a model + object exists with model_params, returns those parameters as a list. + """ # If parameters were not explicitly provided, get them from the model if ( self.parameters is None @@ -194,6 +232,18 @@ def get_parameters(self) -> Dict: return self.parameters def _get_model(self, model_id: Text = None): + """Retrieve a Model instance by its ID. + + Args: + model_id (Text, optional): The ID of the model to retrieve. If not provided, + uses the tool's model ID. Defaults to None. + + Returns: + Model: The retrieved Model instance. + + Raises: + Exception: If the model cannot be retrieved or accessed. + """ from aixplain.factories.model_factory import ModelFactory model_id = model_id or self.model @@ -255,9 +305,20 @@ def validate_parameters(self, received_parameters: Optional[List[Dict]] = None) return received_parameters def __repr__(self) -> Text: + """Return a string representation of the tool. + + Returns: + Text: A string in the format "ModelTool(name=, function=, + supplier=, model=)". + """ supplier_str = self.supplier.value if self.supplier is not None else None model_str = self.model.id if self.model is not None else None return f"ModelTool(name={self.name}, function={self.function}, supplier={supplier_str}, model={model_str})" def deploy(self): + """Deploy the model tool. + + This is a placeholder method as model tools are managed through the aiXplain platform + and don't require explicit deployment. + """ pass diff --git a/aixplain/modules/agent/tool/pipeline_tool.py b/aixplain/modules/agent/tool/pipeline_tool.py index d6bd913e..4ed2bd8e 100644 --- a/aixplain/modules/agent/tool/pipeline_tool.py +++ b/aixplain/modules/agent/tool/pipeline_tool.py @@ -28,11 +28,18 @@ class PipelineTool(Tool): - """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + """A tool that wraps aiXplain pipelines to execute complex workflows based on user commands. + + This class provides an interface for using aiXplain pipelines as tools, allowing them + to be integrated into agent workflows. It handles pipeline validation, status management, + and execution. Attributes: - description (Text): description of the tool - pipeline (Union[Text, Pipeline]): pipeline + description (Text): A description of what the pipeline tool does. + pipeline (Union[Text, Pipeline]): The pipeline to execute, either as a Pipeline instance + or a pipeline ID string. + status (AssetStatus): The current status of the pipeline tool. + name (Text): The name of the tool, defaults to pipeline name if not provided. """ def __init__( @@ -42,11 +49,18 @@ def __init__( name: Optional[Text] = None, **additional_info, ) -> None: - """Specialized software or resource designed to assist the AI in executing specific tasks or functions based on user commands. + """Initialize a new PipelineTool instance. Args: - description (Text): description of the tool - pipeline (Union[Text, Pipeline]): pipeline + description (Text): A description of what the pipeline tool does. + pipeline (Union[Text, Pipeline]): The pipeline to execute, either as a Pipeline instance + or a pipeline ID string. + name (Optional[Text], optional): The name of the tool. If not provided, will use + the pipeline's name. Defaults to None. + **additional_info: Additional keyword arguments for tool configuration. + + Raises: + Exception: If the specified pipeline doesn't exist or is inaccessible. """ name = name or "" super().__init__(name=name, description=description, **additional_info) @@ -57,6 +71,16 @@ def __init__( self.validate() def to_dict(self): + """Convert the tool instance to a dictionary representation. + + Returns: + dict: A dictionary containing the tool's configuration with keys: + - assetId: The pipeline ID + - name: The tool's name + - description: The tool's description + - type: Always "pipeline" + - status: The tool's status + """ return { "assetId": self.pipeline, "name": self.name, @@ -66,9 +90,24 @@ def to_dict(self): } def __repr__(self) -> Text: + """Return a string representation of the tool. + + Returns: + Text: A string in the format "PipelineTool(name=, pipeline=)". + """ return f"PipelineTool(name={self.name}, pipeline={self.pipeline})" def validate(self): + """Validate the pipeline tool's configuration. + + This method performs several checks: + 1. Verifies the pipeline exists and is accessible + 2. Sets the tool name to the pipeline name if not provided + 3. Updates the tool status to match the pipeline status + + Raises: + Exception: If the pipeline doesn't exist or is inaccessible. + """ from aixplain.factories.pipeline_factory import PipelineFactory if isinstance(self.pipeline, Pipeline): @@ -86,4 +125,9 @@ def validate(self): self.status = pipeline_obj.status def deploy(self): + """Deploy the pipeline tool. + + This is a placeholder method as pipeline tools are managed through the aiXplain platform + and don't require explicit deployment. + """ pass diff --git a/aixplain/modules/agent/tool/python_interpreter_tool.py b/aixplain/modules/agent/tool/python_interpreter_tool.py index 69212bf4..0b36abe5 100644 --- a/aixplain/modules/agent/tool/python_interpreter_tool.py +++ b/aixplain/modules/agent/tool/python_interpreter_tool.py @@ -28,16 +28,41 @@ class PythonInterpreterTool(Tool): - """Python Interpreter Tool""" + """A tool that provides a Python shell for executing Python commands. + + This tool allows direct execution of Python code within the aiXplain platform. + It acts as an interface to a Python interpreter, enabling dynamic code execution + and computation. + + Attributes: + name (Text): Always set to "Python Interpreter". + description (Text): Description of the tool's functionality. + status (AssetStatus): The current status of the tool (ONBOARDED or DRAFT). + """ def __init__(self, **additional_info) -> None: - """Python Interpreter Tool""" + """Initialize a new PythonInterpreterTool instance. + + This initializes a Python interpreter tool with a fixed name and description. + The tool is set to ONBOARDED status by default. + + Args: + **additional_info: Additional keyword arguments for tool configuration. + """ description = "A Python shell. Use this to execute python commands. Input should be a valid python command." super().__init__(name="Python Interpreter", description=description, **additional_info) self.status = AssetStatus.ONBOARDED # TODO: change to DRAFT when we have a way to onboard the tool def to_dict(self): + """Convert the tool instance to a dictionary representation. + + Returns: + dict: A dictionary containing the tool's configuration with keys: + - description: The tool's description + - type: Always "utility" + - utility: Always "custom_python_code" + """ return { "description": self.description, "type": "utility", @@ -45,10 +70,25 @@ def to_dict(self): } def validate(self): + """Validate the tool's configuration. + + This is a placeholder method as the Python interpreter tool has a fixed + configuration that doesn't require validation. + """ pass def __repr__(self) -> Text: + """Return a string representation of the tool. + + Returns: + Text: A string in the format "PythonInterpreterTool()". + """ return "PythonInterpreterTool()" def deploy(self): + """Deploy the Python interpreter tool. + + This is a placeholder method as the Python interpreter tool is automatically + available and doesn't require explicit deployment. + """ pass diff --git a/aixplain/modules/agent/tool/sql_tool.py b/aixplain/modules/agent/tool/sql_tool.py index 4cc36c68..331f4539 100644 --- a/aixplain/modules/agent/tool/sql_tool.py +++ b/aixplain/modules/agent/tool/sql_tool.py @@ -50,7 +50,20 @@ class DatabaseError(SQLToolError): def clean_column_name(col: Text) -> Text: - """Clean column names by replacing spaces and special characters with underscores""" + """Clean column names by replacing spaces and special characters with underscores. + + This function makes column names SQLite-compatible by: + 1. Converting to lowercase + 2. Replacing special characters with underscores + 3. Removing duplicate underscores + 4. Adding 'col_' prefix to names starting with numbers + + Args: + col (Text): The original column name. + + Returns: + Text: The cleaned, SQLite-compatible column name. + """ # Replace special characters with underscores cleaned = col.strip().lower() cleaned = "".join(c if c.isalnum() else "_" for c in cleaned) @@ -68,7 +81,17 @@ def clean_column_name(col: Text) -> Text: def check_duplicate_columns(df: pd.DataFrame) -> None: - """Check for duplicate column names in DataFrame and raise CSVError if found""" + """Check for duplicate column names in DataFrame after cleaning. + + This function checks if any column names would become duplicates after being + cleaned for SQLite compatibility. + + Args: + df (pd.DataFrame): The DataFrame to check for duplicate column names. + + Raises: + CSVError: If any cleaned column names would be duplicates. + """ # Get all column names columns = df.columns.tolist() # Get cleaned column names @@ -88,7 +111,24 @@ def check_duplicate_columns(df: pd.DataFrame) -> None: def infer_sqlite_type(dtype) -> Text: - """Infer SQLite type from pandas dtype""" + """Infer SQLite type from pandas dtype. + + This function maps pandas data types to appropriate SQLite types: + - Integer types -> INTEGER + - Float types -> REAL + - Boolean types -> INTEGER + - Datetime types -> TIMESTAMP + - All others -> TEXT + + Args: + dtype: The pandas dtype to convert. + + Returns: + Text: The corresponding SQLite type. + + Note: + Issues a warning when falling back to TEXT type. + """ if pd.api.types.is_integer_dtype(dtype): return "INTEGER" elif pd.api.types.is_float_dtype(dtype): @@ -103,7 +143,22 @@ def infer_sqlite_type(dtype) -> Text: def get_table_schema(database_path: str) -> str: - """Get the schema of all tables in the database""" + """Get the schema of all tables in the SQLite database. + + This function retrieves the CREATE TABLE statements for all tables in the database. + + Args: + database_path (str): Path to the SQLite database file. + + Returns: + str: A string containing all table schemas, separated by newlines. + + Raises: + DatabaseError: If the database file doesn't exist or there's an error accessing it. + + Note: + Issues a warning if no tables are found in the database. + """ if not os.path.exists(database_path): raise DatabaseError(f"Database file '{database_path}' does not exist") @@ -128,7 +183,29 @@ def get_table_schema(database_path: str) -> str: def create_database_from_csv(csv_path: str, database_path: str, table_name: str = None) -> str: - """Create SQLite database from CSV file and return the schema""" + """Create SQLite database from CSV file and return the schema. + + This function creates or modifies a SQLite database by importing data from a CSV file. + It handles column name cleaning, data type inference, and data conversion. + + Args: + csv_path (str): Path to the CSV file to import. + database_path (str): Path where the SQLite database should be created/modified. + table_name (str, optional): Name for the table to create. If not provided, + uses the CSV filename (cleaned). Defaults to None. + + Returns: + str: The schema of the created database. + + Raises: + CSVError: If there are issues with the CSV file (doesn't exist, empty, parsing error). + DatabaseError: If there are issues with database creation or modification. + + Note: + - Issues warnings for column name changes and existing database/table modifications. + - Automatically cleans column names for SQLite compatibility. + - Handles NULL values, timestamps, and numeric data types appropriately. + """ if not os.path.exists(csv_path): raise CSVError(f"CSV file '{csv_path}' does not exist") if not csv_path.endswith(".csv"): @@ -232,7 +309,17 @@ def create_database_from_csv(csv_path: str, database_path: str, table_name: str def get_table_names_from_schema(schema: str) -> List[str]: - """Extract table names from schema string""" + """Extract table names from a database schema string. + + This function parses CREATE TABLE statements to extract table names. + + Args: + schema (str): The database schema string containing CREATE TABLE statements. + + Returns: + List[str]: A list of table names found in the schema. Returns an empty list + if no tables are found or if the schema is empty. + """ if not schema: return [] @@ -247,14 +334,21 @@ def get_table_names_from_schema(schema: str) -> List[str]: class SQLTool(Tool): - """Tool to execute SQL commands in an SQLite database. + """A tool for executing SQL commands in an SQLite database. + + This tool provides an interface for interacting with SQLite databases, including + executing queries, managing schema, and handling table operations. It supports + both read-only and write operations based on configuration. Attributes: - description (Text): description of the tool - database (Text): database name - schema (Text): database schema description - tables (Optional[Union[List[Text], Text]]): table names to work with (optional) - enable_commit (bool): enable to modify the database (optional) + description (Text): A description of what the SQL tool does. + database (Text): The database URI or path. + schema (Text): The database schema containing table definitions. + tables (Optional[Union[List[Text], Text]]): List of table names that can be + accessed by this tool. If None, all tables are accessible. + enable_commit (bool): Whether write operations (INSERT, UPDATE, DELETE) are + allowed. If False, only read operations are permitted. + status (AssetStatus): The current status of the tool (DRAFT or ONBOARDED). """ def __init__( @@ -267,15 +361,25 @@ def __init__( enable_commit: bool = False, **additional_info, ) -> None: - """Tool to execute SQL query commands in an SQLite database. + """Initialize a new SQLTool instance. Args: - name (Text): name of the tool - description (Text): description of the tool - database (Text): database uri - schema (Optional[Text]): database schema description - tables (Optional[Union[List[Text], Text]]): table names to work with (optional) - enable_commit (bool): enable to modify the database (optional) + name (Text): The name of the tool. + description (Text): A description of what the SQL tool does. + database (Text): The database URI or path. Can be a local file path, + S3 URI, or HTTP(S) URL. + schema (Optional[Text], optional): The database schema containing table + definitions. If not provided, will be inferred from the database. + Defaults to None. + tables (Optional[Union[List[Text], Text]], optional): List of table names + that can be accessed by this tool. If not provided, all tables are + accessible. Defaults to None. + enable_commit (bool, optional): Whether write operations are allowed. + If False, only read operations are permitted. Defaults to False. + **additional_info: Additional keyword arguments for tool configuration. + + Raises: + SQLToolError: If required parameters are missing or invalid. """ super().__init__(name=name, description=description, **additional_info) @@ -288,6 +392,19 @@ def __init__( self.validate() def to_dict(self) -> Dict[str, Text]: + """Convert the tool instance to a dictionary representation. + + Returns: + Dict[str, Text]: A dictionary containing the tool's configuration with keys: + - name: The tool's name + - description: The tool's description + - parameters: List of parameter dictionaries containing: + - database: The database URI or path + - schema: The database schema + - tables: Comma-separated list of table names or None + - enable_commit: Whether write operations are allowed + - type: Always "sql" + """ return { "name": self.name, "description": self.description, @@ -301,6 +418,19 @@ def to_dict(self) -> Dict[str, Text]: } def validate(self): + """Validate the SQL tool's configuration. + + This method performs several checks: + 1. Verifies required fields (description, database) are provided + 2. Validates database path/URI format + 3. Infers schema from database if not provided + 4. Sets table list from schema if not provided + 5. Uploads local database file to storage + + Raises: + SQLToolError: If any validation check fails or if there are issues with + database access or file operations. + """ from aixplain.factories.file_factory import FileFactory if not self.description or self.description.strip() == "": @@ -340,6 +470,18 @@ def validate(self): raise SQLToolError(f"Failed to upload database: {str(e)}") def deploy(self) -> None: + """Deploy the SQL tool by downloading and preparing the database. + + This method handles the deployment process: + 1. For HTTP(S) URLs: Downloads the database file + 2. Creates a unique local filename + 3. Uploads the database to the aiXplain platform + 4. Cleans up temporary files + + Raises: + requests.exceptions.RequestException: If downloading the database fails. + Exception: If there are issues with file operations or uploads. + """ import uuid import requests from pathlib import Path diff --git a/aixplain/modules/agent/utils.py b/aixplain/modules/agent/utils.py index 684c82db..565f3054 100644 --- a/aixplain/modules/agent/utils.py +++ b/aixplain/modules/agent/utils.py @@ -5,6 +5,24 @@ def process_variables( query: Union[Text, Dict], data: Union[Dict, Text], parameters: Dict, agent_description: Union[Text, None] ) -> Text: + """Process variables in an agent's description and input data. + + This function validates and processes variables in an agent's description and + input data, ensuring that all required variables are present and properly + formatted. + + Args: + query (Union[Text, Dict]): The input data provided to the agent. + data (Union[Dict, Text]): The data to be processed. + parameters (Dict): The parameters available to the agent. + agent_description (Union[Text, None]): The description of the agent. + + Returns: + Text: The processed input data with all required variables included. + + Raises: + AssertionError: If a required variable is not found in the data or parameters. + """ from aixplain.factories.file_factory import FileFactory if isinstance(query, dict): diff --git a/aixplain/modules/api_key.py b/aixplain/modules/api_key.py index 21a2e0c6..03ae156f 100644 --- a/aixplain/modules/api_key.py +++ b/aixplain/modules/api_key.py @@ -7,6 +7,19 @@ class APIKeyLimits: + """Rate limits configuration for an API key. + + This class defines the rate limits that can be applied either globally + to an API key or specifically to a model. + + Attributes: + token_per_minute (int): Maximum number of tokens allowed per minute. + token_per_day (int): Maximum number of tokens allowed per day. + request_per_minute (int): Maximum number of requests allowed per minute. + request_per_day (int): Maximum number of requests allowed per day. + model (Optional[Model]): The model these limits apply to, if any. + """ + def __init__( self, token_per_minute: int, @@ -15,6 +28,16 @@ def __init__( request_per_day: int, model: Optional[Union[Text, Model]] = None, ): + """Initialize an APIKeyLimits instance. + + Args: + token_per_minute (int): Maximum number of tokens per minute. + token_per_day (int): Maximum number of tokens per day. + request_per_minute (int): Maximum number of requests per minute. + request_per_day (int): Maximum number of requests per day. + model (Optional[Union[Text, Model]], optional): The model to apply + limits to. Can be a model ID or Model instance. Defaults to None. + """ self.token_per_minute = token_per_minute self.token_per_day = token_per_day self.request_per_minute = request_per_minute @@ -55,6 +78,23 @@ def __init__( class APIKey: + """An API key for accessing aiXplain services. + + This class represents an API key with its associated limits, budget, + and access controls. It can have both global rate limits and + model-specific rate limits. + + Attributes: + id (int): The ID of this API key. + name (Text): A descriptive name for the API key. + budget (Optional[float]): Maximum spending limit, if any. + global_limits (Optional[APIKeyLimits]): Rate limits applied globally. + asset_limits (List[APIKeyLimits]): Rate limits for specific models. + expires_at (Optional[datetime]): Expiration date and time. + access_key (Optional[Text]): The actual API key value. + is_admin (bool): Whether this is an admin API key. + """ + def __init__( self, name: Text, @@ -66,6 +106,32 @@ def __init__( access_key: Optional[Text] = None, is_admin: bool = False, ): + """Initialize an APIKey instance. + + Args: + name (Text): A descriptive name for the API key. + expires_at (Optional[Union[datetime, Text]], optional): When the key + expires. Can be a datetime or ISO format string. Defaults to None. + budget (Optional[float], optional): Maximum spending limit. + Defaults to None. + asset_limits (List[APIKeyLimits], optional): Rate limits for specific + models. Defaults to empty list. + global_limits (Optional[Union[Dict, APIKeyLimits]], optional): Global + rate limits. Can be a dict with tpm/tpd/rpm/rpd keys or an + APIKeyLimits instance. Defaults to None. + id (int, optional): Unique identifier. Defaults to empty string. + access_key (Optional[Text], optional): The actual API key value. + Defaults to None. + is_admin (bool, optional): Whether this is an admin key. + Defaults to False. + + Note: + The global_limits dict format should have these keys: + - tpm: tokens per minute + - tpd: tokens per day + - rpm: requests per minute + - rpd: requests per day + """ self.id = id self.name = name self.budget = budget @@ -93,7 +159,23 @@ def __init__( self.validate() def validate(self) -> None: - """Validate the APIKey object""" + """Validate the APIKey configuration. + + This method checks that all rate limits are non-negative and that + referenced models exist and are valid. + + Raises: + AssertionError: If any of these conditions are not met: + - Budget is negative + - Global rate limits are negative + - Asset-specific rate limits are negative + Exception: If a referenced model ID is not a valid aiXplain model. + + Note: + - For asset limits, both the model reference and limits are checked + - Models can be specified by ID or Model instance + - Model IDs are resolved to Model instances during validation + """ from aixplain.factories import ModelFactory if self.budget is not None: @@ -117,7 +199,29 @@ def validate(self) -> None: raise Exception(f"Asset {asset_limit.model} is not a valid aiXplain model.") def to_dict(self) -> Dict: - """Convert the APIKey object to a dictionary""" + """Convert the APIKey instance to a dictionary representation. + + This method serializes the APIKey and its associated limits into a + format suitable for API requests or storage. + + Returns: + Dict: A dictionary containing: + - id (int): The API key's ID + - name (Text): The API key's name + - budget (Optional[float]): The spending limit + - assetsLimits (List[Dict]): Model-specific limits with: + - tpm: tokens per minute + - tpd: tokens per day + - rpm: requests per minute + - rpd: requests per day + - assetId: model ID + - expiresAt (Optional[Text]): ISO format expiration date + - globalLimits (Optional[Dict]): Global limits with tpm/tpd/rpm/rpd + + Note: + - Datetime objects are converted to ISO format strings + - Model instances are referenced by their ID + """ payload = { "id": self.id, "name": self.name, @@ -150,7 +254,23 @@ def to_dict(self) -> Dict: return payload def delete(self) -> None: - """Delete an API key by its ID""" + """Delete this API key from the system. + + This method permanently removes the API key from the aiXplain platform. + The operation cannot be undone. + + Raises: + Exception: If deletion fails, which can happen if: + - The API key doesn't exist + - The user doesn't have permission to delete it + - The API request fails + - The server returns a non-200 status code + + Note: + - This operation is permanent and cannot be undone + - Only the API key owner can delete it + - Uses the team API key for authentication + """ try: url = f"{config.BACKEND_URL}/sdk/api-keys/{self.id}" headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} @@ -164,7 +284,35 @@ def delete(self) -> None: raise Exception(f"{message}") def get_usage(self, asset_id: Optional[Text] = None) -> APIKeyUsageLimit: - """Get the usage limits of an API key""" + """Get current usage statistics for this API key. + + This method retrieves the current usage counts and limits for the API key, + either globally or for a specific model. + + Args: + asset_id (Optional[Text], optional): The model ID to get usage for. + If None, returns usage for all models. Defaults to None. + + Returns: + APIKeyUsageLimit: A list of usage statistics objects containing: + - daily_request_count: Number of requests made today + - daily_request_limit: Maximum requests allowed per day + - daily_token_count: Number of tokens used today + - daily_token_limit: Maximum tokens allowed per day + - model: The model ID these stats apply to (None for global) + + Raises: + Exception: If the request fails, which can happen if: + - The API key doesn't exist + - The user doesn't have permission to view usage + - The API request fails + - The server returns an error response + + Note: + - Uses the team API key for authentication + - Counts reset at the start of each day + - Filtered by asset_id if provided + """ try: url = f"{config.BACKEND_URL}/sdk/api-keys/{self.id}/usage-limits" headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} @@ -192,7 +340,26 @@ def get_usage(self, asset_id: Optional[Text] = None) -> APIKeyUsageLimit: raise Exception(f"API Key Usage Error: Failed to get usage. Error: {str(resp)}") def __set_limit(self, limit: int, model: Optional[Union[Text, Model]], limit_type: Text) -> None: - """Set a limit for an API key""" + """Internal method to set a rate limit value. + + This method updates either a global limit or a model-specific limit + with the provided value. + + Args: + limit (int): The new limit value to set. + model (Optional[Union[Text, Model]]): The model to set limit for. + If None, sets a global limit. + limit_type (Text): The type of limit to set (e.g., "token_per_day"). + + Raises: + Exception: If trying to set a limit for a model that isn't + configured in this API key's asset_limits. + + Note: + - Model can be specified by ID or Model instance + - For global limits, model should be None + - limit_type must match an attribute name in APIKeyLimits + """ if model is None: setattr(self.global_limits, limit_type, limit) else: @@ -208,17 +375,77 @@ def __set_limit(self, limit: int, model: Optional[Union[Text, Model]], limit_typ raise Exception(f"Limit for Model {model} not found in the API key.") def set_token_per_day(self, token_per_day: int, model: Optional[Union[Text, Model]] = None) -> None: - """Set the token per day limit of an API key""" + """Set the daily token limit for this API key. + + Args: + token_per_day (int): Maximum number of tokens allowed per day. + model (Optional[Union[Text, Model]], optional): The model to set + limit for. If None, sets global limit. Defaults to None. + + Raises: + Exception: If the model isn't configured in this API key's + asset_limits. + + Note: + - Model can be specified by ID or Model instance + - For global limits, model should be None + - The new limit takes effect immediately + """ self.__set_limit(token_per_day, model, "token_per_day") def set_token_per_minute(self, token_per_minute: int, model: Optional[Union[Text, Model]] = None) -> None: - """Set the token per minute limit of an API key""" + """Set the per-minute token limit for this API key. + + Args: + token_per_minute (int): Maximum number of tokens allowed per minute. + model (Optional[Union[Text, Model]], optional): The model to set + limit for. If None, sets global limit. Defaults to None. + + Raises: + Exception: If the model isn't configured in this API key's + asset_limits. + + Note: + - Model can be specified by ID or Model instance + - For global limits, model should be None + - The new limit takes effect immediately + """ self.__set_limit(token_per_minute, model, "token_per_minute") def set_request_per_day(self, request_per_day: int, model: Optional[Union[Text, Model]] = None) -> None: - """Set the request per day limit of an API key""" + """Set the daily request limit for this API key. + + Args: + request_per_day (int): Maximum number of requests allowed per day. + model (Optional[Union[Text, Model]], optional): The model to set + limit for. If None, sets global limit. Defaults to None. + + Raises: + Exception: If the model isn't configured in this API key's + asset_limits. + + Note: + - Model can be specified by ID or Model instance + - For global limits, model should be None + - The new limit takes effect immediately + """ self.__set_limit(request_per_day, model, "request_per_day") def set_request_per_minute(self, request_per_minute: int, model: Optional[Union[Text, Model]] = None) -> None: - """Set the request per minute limit of an API key""" + """Set the per-minute request limit for this API key. + + Args: + request_per_minute (int): Maximum number of requests allowed per minute. + model (Optional[Union[Text, Model]], optional): The model to set + limit for. If None, sets global limit. Defaults to None. + + Raises: + Exception: If the model isn't configured in this API key's + asset_limits. + + Note: + - Model can be specified by ID or Model instance + - For global limits, model should be None + - The new limit takes effect immediately + """ self.__set_limit(request_per_minute, model, "request_per_minute") diff --git a/aixplain/modules/asset.py b/aixplain/modules/asset.py index b4323cf5..d5b3b9fb 100644 --- a/aixplain/modules/asset.py +++ b/aixplain/modules/asset.py @@ -27,6 +27,23 @@ class Asset: + """A class representing an aiXplain Asset. + + This class provides functionality to create and manage assets in the aiXplain platform. + Assets can be models, datasets, or other resources with associated metadata like + supplier information, version, license, privacy settings, and cost. + + Attributes: + id (Text): The unique identifier of the asset. + name (Text): The name of the asset. + description (Text): A detailed description of the asset. + supplier (Union[Dict, Text, Supplier, int]): The supplier of the asset. + version (Text): The version of the asset. + license (Optional[License]): The license associated with the asset. + privacy (Privacy): The privacy setting of the asset. + cost (Optional[Union[Dict, float]]): The cost associated with the asset. + """ + def __init__( self, id: Text, @@ -38,15 +55,19 @@ def __init__( privacy: Privacy = Privacy.PRIVATE, cost: Optional[Union[Dict, float]] = None, ) -> None: - """Create an Asset with the necessary information + """Initialize a new Asset instance. Args: - id (Text): ID of the Asset - name (Text): Name of the Asset - description (Text): Description of the Asset - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Optional[Text], optional): asset version. Defaults to "1.0". - cost (Optional[Union[Dict, float]], optional): asset price. Defaults to None. + id (Text): Unique identifier of the asset. + name (Text): Name of the asset. + description (Text): Detailed description of the asset. + supplier (Union[Dict, Text, Supplier, int], optional): Supplier of the asset. + Can be a Supplier enum, dictionary, text, or integer. Defaults to Supplier.AIXPLAIN. + version (Text, optional): Version of the asset. Defaults to "1.0". + license (Optional[License], optional): License associated with the asset. Defaults to None. + privacy (Privacy, optional): Privacy setting of the asset. Defaults to Privacy.PRIVATE. + cost (Optional[Union[Dict, float]], optional): Cost of the asset. Can be a dictionary + with pricing details or a float value. Defaults to None. """ self.id = id self.name = name @@ -72,9 +93,13 @@ def __init__( self.cost = cost def to_dict(self) -> dict: - """Get the asset info as a Dictionary + """Convert the Asset instance to a dictionary representation. + + This method serializes all attributes of the Asset instance into a dictionary + format, which can be useful for data transmission or storage. Returns: - dict: Asset Information + dict: A dictionary containing all attributes of the Asset instance. + Keys are attribute names and values are their corresponding values. """ return self.__dict__ diff --git a/aixplain/modules/benchmark.py b/aixplain/modules/benchmark.py index 6878becf..83ca5098 100644 --- a/aixplain/modules/benchmark.py +++ b/aixplain/modules/benchmark.py @@ -61,13 +61,14 @@ def __init__( Args: id (Text): ID of the Benchmark. name (Text): Name of the Benchmark. - model_list (List[Model]): List of Models to be used for benchmarking - dataset_list (List[Dataset]): List of Datasets to be used for benchmarking - metric_list (List[Metric]): List of Metrics to be used for benchmarking - job_list (List[BenchmarkJob]): List of associated Benchmark Jobs - supplier (Text, optional): author of the Benchmark. Defaults to "aiXplain". + dataset_list (List[Dataset]): List of Datasets to be used for benchmarking. + model_list (List[Model]): List of Models to be used for benchmarking. + metric_list (List[Metric]): List of Metrics to be used for benchmarking. + job_list (List[BenchmarkJob]): List of associated Benchmark Jobs. + description (Text, optional): Description of the Benchmark. Defaults to "". + supplier (Text, optional): Author of the Benchmark. Defaults to "aiXplain". version (Text, optional): Benchmark version. Defaults to "1.0". - **additional_info: Any additional Benchmark info to be saved + **additional_info: Any additional Benchmark info to be saved. """ super().__init__(id, name, description, supplier, version) self.model_list = model_list @@ -80,13 +81,27 @@ def __init__( self.aixplain_key = config.AIXPLAIN_API_KEY def __repr__(self) -> str: + """Return a string representation of the Benchmark instance. + + Returns: + str: A string in the format "". + """ return f"" def start(self) -> BenchmarkJob: - """Starts a new benchmark job(run) for the current benchmark + """Start a new benchmark job (run) for the current benchmark. + + This method initiates a new benchmark job using the configured models, + datasets, and metrics. It communicates with the backend API to create + and start the job. Returns: - BenchmarkJob: Benchmark Job that just got started + BenchmarkJob: A new BenchmarkJob instance representing the started job. + Returns None if the job creation fails. + + Raises: + Exception: If there's an error creating or starting the benchmark job. + The error is logged and None is returned. """ benhchmark_id = None try: diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index 4ab0865b..83fa7ac0 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -1,5 +1,5 @@ import logging -from typing import Text, Dict, Optional +from typing import Text, Dict, Optional, Union from aixplain.utils import config from urllib.parse import urljoin import pandas as pd @@ -36,10 +36,30 @@ def __init__(self, id: Text, status: Text, benchmark_id: Text, **additional_info @classmethod def _create_benchmark_job_from_response(cls, response: Dict): + """Create a BenchmarkJob instance from an API response. + + Args: + response (Dict): The API response containing benchmark job information. + Must contain 'jobId', 'status', and 'benchmark.id' fields. + + Returns: + BenchmarkJob: A new BenchmarkJob instance initialized with the response data. + """ return BenchmarkJob(response["jobId"], response["status"], response["benchmark"]["id"]) @classmethod def _fetch_current_response(cls, job_id: Text) -> dict: + """Fetch the current state of a benchmark job from the API. + + Args: + job_id (Text): The ID of the benchmark job to fetch. + + Returns: + dict: The API response containing the current state of the benchmark job. + + Raises: + Exception: If the API request fails. + """ url = urljoin(config.BACKEND_URL, f"sdk/benchmarks/jobs/{job_id}") headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} r = _request_with_retry("get", url, headers=headers) @@ -47,12 +67,25 @@ def _fetch_current_response(cls, job_id: Text) -> dict: return resp def _update_from_response(self, response: dict): + """Update the benchmark job's state from an API response. + + Args: + response (dict): The API response containing updated benchmark job information. + Must contain a 'status' field. + """ self.status = response["status"] def __repr__(self) -> str: return f"" - def check_status(self): + def check_status(self) -> Text: + """Check the current status of the benchmark job. + + Fetches the latest status from the API and updates the local state. + + Returns: + Text: The current status of the benchmark job. + """ response = self._fetch_current_response(self.id) self._update_from_response(response) return self.status @@ -92,7 +125,17 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf logging.error(error_message, exc_info=True) raise Exception(error_message) - def __simplify_scores(self, scores): + def __simplify_scores(self, scores: Dict) -> list: + """Simplify the raw scores into a more readable format. + + Args: + scores (Dict): Raw scores dictionary containing model IDs as keys and + score information as values. + + Returns: + list: A list of dictionaries, each containing a model's scores in a simplified format. + Each dictionary has 'Model' as a key and metric names as additional keys. + """ simplified_score_list = [] for model_id, model_info in scores.items(): model_scores = model_info["rawScores"] @@ -103,7 +146,24 @@ def __simplify_scores(self, scores): simplified_score_list.append(row) return simplified_score_list - def get_scores(self, return_simplified=True, return_as_dataframe=True): + def get_scores(self, return_simplified: bool = True, return_as_dataframe: bool = True) -> Union[Dict, pd.DataFrame, list]: + """Get the benchmark scores for all models. + + Args: + return_simplified (bool, optional): If True, returns a simplified version of scores. + Defaults to True. + return_as_dataframe (bool, optional): If True and return_simplified is True, + returns results as a pandas DataFrame. Defaults to True. + + Returns: + Union[Dict, pd.DataFrame, list]: The benchmark scores in the requested format. + - If return_simplified=False: Returns a dictionary with detailed model scores + - If return_simplified=True and return_as_dataframe=True: Returns a pandas DataFrame + - If return_simplified=True and return_as_dataframe=False: Returns a list of dictionaries + + Raises: + Exception: If there's an error fetching or processing the scores. + """ try: resp = self._fetch_current_response(self.id) iterations = resp.get("iterations", []) @@ -134,7 +194,21 @@ def get_scores(self, return_simplified=True, return_as_dataframe=True): logging.error(error_message, exc_info=True) raise Exception(error_message) - def get_failuire_rate(self, return_as_dataframe=True): + def get_failuire_rate(self, return_as_dataframe: bool = True) -> Union[Dict, pd.DataFrame]: + """Calculate the failure rate for each model in the benchmark. + + Args: + return_as_dataframe (bool, optional): If True, returns results as a pandas DataFrame. + Defaults to True. + + Returns: + Union[Dict, pd.DataFrame]: The failure rates for each model. + - If return_as_dataframe=True: Returns a DataFrame with 'Model' and 'Failure Rate' columns + - If return_as_dataframe=False: Returns a dictionary with model IDs as keys and failure rates as values + + Raises: + Exception: If there's an error calculating the failure rates. + """ try: scores = self.get_scores(return_simplified=False) failure_rates = {} @@ -159,7 +233,18 @@ def get_failuire_rate(self, return_as_dataframe=True): logging.error(error_message, exc_info=True) raise Exception(error_message) - def get_all_explanations(self): + def get_all_explanations(self) -> Dict: + """Get all explanations for the benchmark results. + + Returns: + Dict: A dictionary containing both metric-dependent and metric-independent explanations. + The dictionary has two keys: + - 'metricInDependent': List of metric-independent explanations + - 'metricDependent': List of metric-dependent explanations + + Raises: + Exception: If there's an error fetching the explanations. + """ try: resp = self._fetch_current_response(self) raw_explanations = resp.get("explanation", {}) @@ -173,7 +258,25 @@ def get_all_explanations(self): logging.error(error_message, exc_info=True) raise Exception(error_message) - def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False): + def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False) -> Dict: + """Get localized explanations for the benchmark results. + + Args: + metric_dependant (bool): If True, returns metric-dependent explanations. + If False, returns metric-independent explanations. + group_by_task (bool, optional): If True and metric_dependant is True, + groups explanations by task. Defaults to False. + + Returns: + Dict: A dictionary containing the localized explanations. + The structure depends on the input parameters: + - If metric_dependant=False: Returns metric-independent explanations + - If metric_dependant=True and group_by_task=False: Returns explanations grouped by score ID + - If metric_dependant=True and group_by_task=True: Returns explanations grouped by task + + Raises: + Exception: If there's an error fetching or processing the explanations. + """ try: raw_explanations = self.get_all_explanations() if metric_dependant: diff --git a/aixplain/modules/content_interval.py b/aixplain/modules/content_interval.py index e8283eb8..ce6f55ce 100644 --- a/aixplain/modules/content_interval.py +++ b/aixplain/modules/content_interval.py @@ -27,24 +27,76 @@ @dataclass class ContentInterval: + """Base class for representing intervals or segments within content. + + This class serves as the base for more specific content interval types + like text, audio, image, and video intervals. + + Attributes: + content (Text): The actual content within the interval. + content_id (int): ID of the content interval. + """ content: Text content_id: int @dataclass class TextContentInterval(ContentInterval): + """Class representing an interval or segment within text content. + + This class extends ContentInterval to handle text-specific intervals, + supporting both character-based and line-column-based positions. + + Attributes: + content (Text): The text content within the interval. + content_id (int): ID of the content interval. + start (Union[int, Tuple[int, int]]): Starting position of the interval. + Can be either a character offset (int) or a line-column tuple (int, int). + end (Union[int, Tuple[int, int]]): Ending position of the interval. + Can be either a character offset (int) or a line-column tuple (int, int). + """ start: Union[int, Tuple[int, int]] end: Union[int, Tuple[int, int]] @dataclass class AudioContentInterval(ContentInterval): + """Class representing an interval or segment within audio content. + + This class extends ContentInterval to handle audio-specific intervals + using timestamps. + + Attributes: + content (Text): The audio content within the interval. + content_id (int): ID of the content interval. + start_time (float): Starting timestamp of the interval in seconds. + end_time (float): Ending timestamp of the interval in seconds. + """ start_time: float end_time: float @dataclass class ImageContentInterval(ContentInterval): + """Class representing an interval or region within image content. + + This class extends ContentInterval to handle image-specific regions, + supporting both single points and polygons through coordinates. + + Attributes: + content (Text): The image content within the interval. + content_id (int): ID of the content interval. + x (Union[float, List[float]]): X-coordinate(s) of the region. + Single float for rectangular regions, list for polygon vertices. + y (Union[float, List[float]]): Y-coordinate(s) of the region. + Single float for rectangular regions, list for polygon vertices. + width (Optional[float]): Width of the region in pixels. Only used for + rectangular regions. Defaults to None. + height (Optional[float]): Height of the region in pixels. Only used for + rectangular regions. Defaults to None. + rotation (Optional[float]): Rotation angle of the region in degrees. + Defaults to None. + """ x: Union[float, List[float]] y: Union[float, List[float]] width: Optional[float] = None @@ -54,6 +106,29 @@ class ImageContentInterval(ContentInterval): @dataclass class VideoContentInterval(ContentInterval): + """Class representing an interval or region within video content. + + This class extends ContentInterval to handle video-specific intervals, + combining temporal information with optional spatial regions. + + Attributes: + content (Text): The video content within the interval. + content_id (int): ID of the content interval. + start_time (float): Starting timestamp of the interval in seconds. + end_time (float): Ending timestamp of the interval in seconds. + x (Optional[Union[float, List[float]]], optional): X-coordinate(s) of the region. + Single float for rectangular regions, list for polygon vertices. + Defaults to None. + y (Optional[Union[float, List[float]]], optional): Y-coordinate(s) of the region. + Single float for rectangular regions, list for polygon vertices. + Defaults to None. + width (Optional[float], optional): Width of the region in pixels. + Only used for rectangular regions. Defaults to None. + height (Optional[float], optional): Height of the region in pixels. + Only used for rectangular regions. Defaults to None. + rotation (Optional[float], optional): Rotation angle of the region in degrees. + Defaults to None. + """ start_time: float end_time: float x: Optional[Union[float, List[float]]] = None diff --git a/aixplain/modules/corpus.py b/aixplain/modules/corpus.py index 10101292..b34d2a2f 100644 --- a/aixplain/modules/corpus.py +++ b/aixplain/modules/corpus.py @@ -35,6 +35,28 @@ class Corpus(Asset): + """A class representing a general-purpose collection of data in the aiXplain platform. + + This class extends Asset to provide functionality for managing corpora, which are + collections of data that can be processed and used to create task-specific datasets. + A corpus can contain various types of data and is used as a foundation for creating + specialized datasets. + + Attributes: + id (Text): ID of the corpus. + name (Text): Name of the corpus. + description (Text): Detailed description of the corpus. + data (List[Data]): List of data objects that make up the corpus. + onboard_status (OnboardStatus): Current onboarding status of the corpus. + functions (List[Function]): AI functions the corpus is suitable for. + tags (List[Text]): Descriptive tags for the corpus. + license (Optional[License]): License associated with the corpus. + privacy (Privacy): Privacy settings for the corpus. + supplier (Text): The supplier/author of the corpus. + version (Text): Version of the corpus. + length (Optional[int]): Number of rows/items in the corpus. + """ + def __init__( self, id: Text, @@ -82,11 +104,29 @@ def __init__( self.length = length self.kwargs = kwargs - def __repr__(self): + def __repr__(self) -> str: + """Return a string representation of the Corpus instance. + + Returns: + str: A string in the format "". + """ return f"" def delete(self) -> None: - """Delete Corpus service""" + """Delete this corpus from the aiXplain platform. + + This method permanently removes the corpus from the platform. The operation + can only be performed by the corpus owner. + + Returns: + None + + Raises: + Exception: If the deletion fails, either because: + - The corpus doesn't exist + - The user is not the owner + - There's a network/server error + """ try: url = urljoin(config.BACKEND_URL, f"sdk/corpora/{self.id}") headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} diff --git a/aixplain/modules/data.py b/aixplain/modules/data.py index 27658d68..a51fe6e5 100644 --- a/aixplain/modules/data.py +++ b/aixplain/modules/data.py @@ -31,6 +31,28 @@ class Data: + """A class representing a collection of data samples of the same type and genre. + + This class provides functionality for managing data in the aiXplain platform, + supporting various data types, languages, and storage formats. It can handle + both structured (e.g., CSV) and unstructured data files. + + Attributes: + id (Text): ID of the data collection. + name (Text): Name of the data collection. + dtype (DataType): Type of data (e.g., text, audio, image). + privacy (Privacy): Privacy settings for the data. + onboard_status (OnboardStatus): Current onboarding status. + data_column (Optional[Any]): Column identifier where data is stored in structured files. + start_column (Optional[Any]): Column identifier for start indexes in structured files. + end_column (Optional[Any]): Column identifier for end indexes in structured files. + files (List[File]): List of files containing the data instances. + languages (List[Language]): List of languages present in the data. + dsubtype (DataSubtype): Subtype categorization of the data. + length (Optional[int]): Number of samples/rows in the data collection. + kwargs (dict): Additional keyword arguments for extensibility. + """ + def __init__( self, id: Text, @@ -47,24 +69,29 @@ def __init__( length: Optional[int] = None, **kwargs ) -> None: - """Data Class. - - Description: - Data consists of a list of samples of same type and genre. + """Initialize a new Data instance. Args: - id (Text): Data ID - name (Text): Data Name - dtype (DataType): Data Type - privacy (Privacy): Privacy of data - onboard_status (OnboardStatus): onboard status - data_column (Optional[Any], optional): Column index/name where the data is on a structured file (e.g. CSV). Defaults to None. - start_column (Optional[Any], optional): Column index/name where the start indexes is on a structured file (e.g. CSV). Defaults to None. - end_column (Optional[Any], optional): Column index/name where the end indexes is on a structured file (e.g. CSV). Defaults to None. - files (List[File], optional): List of files where the data instances are stored. Defaults to []. - languages (List[Language], optional): List of languages which the data consists of. Defaults to []. - dsubtype (DataSubtype, optional): Data subtype (e.g., age, topic, race, split, etc.), used in datasets metadata. Defaults to Other. - length (Optional[int], optional): Number of rows in the Data. Defaults to None. + id (Text): ID of the data collection. + name (Text): Name of the data collection. + dtype (DataType): Type of data (e.g., text, audio, image). + privacy (Privacy): Privacy settings for the data. + onboard_status (OnboardStatus): Current onboarding status of the data. + data_column (Optional[Any], optional): Column identifier where data is stored in + structured files (e.g., CSV). If None, defaults to the value of name. + start_column (Optional[Any], optional): Column identifier where start indexes are + stored in structured files. Defaults to None. + end_column (Optional[Any], optional): Column identifier where end indexes are + stored in structured files. Defaults to None. + files (List[File], optional): List of files containing the data instances. + Defaults to empty list. + languages (List[Language], optional): List of languages present in the data. + Can be provided as Language enums or language codes. Defaults to empty list. + dsubtype (DataSubtype, optional): Subtype categorization of the data + (e.g., age, topic, race, split). Defaults to DataSubtype.OTHER. + length (Optional[int], optional): Number of samples/rows in the data collection. + Defaults to None. + **kwargs: Additional keyword arguments for extensibility. """ self.id = id self.name = name diff --git a/aixplain/modules/dataset.py b/aixplain/modules/dataset.py index 85264013..f05919dd 100644 --- a/aixplain/modules/dataset.py +++ b/aixplain/modules/dataset.py @@ -117,11 +117,29 @@ def __init__( self.length = length self.kwargs = kwargs - def __repr__(self): + def __repr__(self) -> str: + """Return a string representation of the Dataset instance. + + Returns: + str: A string in the format "". + """ return f"" def delete(self) -> None: - """Delete Dataset service""" + """Delete this dataset from the aiXplain platform. + + This method permanently removes the dataset from the platform. The operation + can only be performed by the dataset owner. + + Returns: + None + + Raises: + Exception: If the deletion fails, either because: + - The dataset doesn't exist + - The user is not the owner + - There's a network/server error + """ try: url = urljoin(config.BACKEND_URL, f"sdk/datasets/{self.id}") headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} diff --git a/aixplain/modules/file.py b/aixplain/modules/file.py index 8be38856..5a9449cd 100644 --- a/aixplain/modules/file.py +++ b/aixplain/modules/file.py @@ -29,6 +29,18 @@ class File: + """A class representing a file in the aiXplain platform. + + This class provides functionality for managing files, which are used to store + data samples in the platform. It supports various file types, compression formats, + and data splits. + + Attributes: + path (Union[Text, pathlib.Path]): File path + extension (Union[Text, FileType]): File extension (e.g. CSV, TXT, etc.) + data_split (Optional[DataSplit]): Data split of the file. + compression (Optional[Text]): Compression extension (e.g., .gz). + """ def __init__( self, path: Union[Text, pathlib.Path], @@ -36,10 +48,7 @@ def __init__( data_split: Optional[DataSplit] = None, compression: Optional[Text] = None, ) -> None: - """File Class - - Description: - File where samples of a data is stored in + """Initialize a new File instance. Args: path (Union[Text, pathlib.Path]): File path diff --git a/aixplain/modules/finetune/__init__.py b/aixplain/modules/finetune/__init__.py index 15cc37a7..bb954021 100644 --- a/aixplain/modules/finetune/__init__.py +++ b/aixplain/modules/finetune/__init__.py @@ -35,24 +35,30 @@ class Finetune(Asset): - """FineTune is a powerful tool for fine-tuning machine learning models and using your own datasets for specific tasks. + """A tool for fine-tuning machine learning models using custom datasets. + + This class provides functionality to customize pre-trained models for specific tasks + by fine-tuning them on user-provided datasets. It handles the configuration of + training parameters, data splitting, and job execution. Attributes: - name (Text): Name of the FineTune. - dataset_list (List[Dataset]): List of Datasets to be used for fine-tuning. - model (Model): Model to be fine-tuned. - cost (Cost): Cost of the FineTune. - id (Text): ID of the FineTune. - description (Text): Description of the FineTune. - supplier (Text): Supplier of the FineTune. - version (Text): Version of the FineTune. - train_percentage (float): Percentage of training samples. - dev_percentage (float): Percentage of development samples. - prompt_template (Text): Fine-tuning prompt_template. - hyperparameters (Hyperparameters): Hyperparameters for fine-tuning. - additional_info (dict): Additional information to be saved with the FineTune. - backend_url (str): URL of the backend. - api_key (str): The TEAM API key used for authentication. + name (Text): Name of the fine-tuning job. + dataset_list (List[Dataset]): List of datasets to use for fine-tuning. + model (Model): The base model to be fine-tuned. + cost (FinetuneCost): Cost information for the fine-tuning job. + id (Text): ID of the fine-tuning job. + description (Text): Detailed description of the fine-tuning purpose. + supplier (Text): Provider/creator of the fine-tuned model. + version (Text): Version identifier of the fine-tuning job. + train_percentage (float): Percentage of data to use for training. + dev_percentage (float): Percentage of data to use for validation. + prompt_template (Text): Template for formatting training examples, using + <> to reference dataset columns. + hyperparameters (Hyperparameters): Configuration for the fine-tuning process. + additional_info (dict): Extra metadata for the fine-tuning job. + backend_url (str): URL endpoint for the backend API. + api_key (str): Authentication key for API access. + aixplain_key (str): aiXplain-specific API key. """ def __init__( @@ -71,22 +77,29 @@ def __init__( hyperparameters: Optional[Hyperparameters] = None, **additional_info, ) -> None: - """Create a FineTune with the necessary information. + """Initialize a new Finetune instance. Args: - name (Text): Name of the FineTune. - dataset_list (List[Dataset]): List of Datasets to be used for fine-tuning. - model (Model): Model to be fine-tuned. - cost (Cost): Cost of the FineTune. - id (Text, optional): ID of the FineTune. Defaults to "". - description (Text, optional): Description of the FineTune. Defaults to "". - supplier (Text, optional): Supplier of the FineTune. Defaults to "aiXplain". - version (Text, optional): Version of the FineTune. Defaults to "1.0". - train_percentage (float, optional): Percentage of training samples. Defaults to 100. - dev_percentage (float, optional): Percentage of development samples. Defaults to 0. - prompt_template (Text, optional): Fine-tuning prompt_template. Should reference columns in the dataset using format <>. Defaults to None. - hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None. - **additional_info: Additional information to be saved with the FineTune. + name (Text): Name of the fine-tuning job. + dataset_list (List[Dataset]): List of datasets to use for fine-tuning. + model (Model): The base model to be fine-tuned. + cost (FinetuneCost): Cost information for the fine-tuning job. + id (Text, optional): ID of the job. Defaults to "". + description (Text, optional): Detailed description of the fine-tuning + purpose. Defaults to "". + supplier (Text, optional): Provider/creator of the fine-tuned model. + Defaults to "aiXplain". + version (Text, optional): Version identifier. Defaults to "1.0". + train_percentage (float, optional): Percentage of data to use for + training. Defaults to 100. + dev_percentage (float, optional): Percentage of data to use for + validation. Defaults to 0. + prompt_template (Text, optional): Template for formatting training + examples. Use <> to reference dataset columns. + Defaults to None. + hyperparameters (Hyperparameters, optional): Configuration for the + fine-tuning process. Defaults to None. + **additional_info: Extra metadata for the fine-tuning job. """ super().__init__(id, name, description, supplier, version) self.model = model @@ -102,10 +115,18 @@ def __init__( self.aixplain_key = config.AIXPLAIN_API_KEY def start(self) -> Model: - """Start the Finetune job. + """Start the fine-tuning job on the backend. + + This method submits the fine-tuning configuration to the backend and initiates + the training process. It handles the creation of the training payload, + including dataset splits and hyperparameters. Returns: - Model: The model object representing the Finetune job. + Model: The model object representing the fine-tuning job. Returns None + if the job submission fails. + + Raises: + Exception: If there are errors in the API request or response handling. """ payload = {} try: diff --git a/aixplain/modules/finetune/cost.py b/aixplain/modules/finetune/cost.py index 2700f1cd..83168941 100644 --- a/aixplain/modules/finetune/cost.py +++ b/aixplain/modules/finetune/cost.py @@ -25,6 +25,18 @@ class FinetuneCost: + """A class representing the cost structure for a fine-tuning job. + + This class encapsulates the cost information for training, inference, and hosting + components of a fine-tuning job. It provides methods to convert the cost data + into a dictionary format for serialization. + + Attributes: + training (Dict): Dictionary containing training cost information. + inference (Dict): Dictionary containing inference cost information. + hosting (Dict): Dictionary containing hosting cost information. + """ + def __init__( self, training: Dict, diff --git a/aixplain/modules/finetune/hyperparameters.py b/aixplain/modules/finetune/hyperparameters.py index 915a5b27..7a5cf5ca 100644 --- a/aixplain/modules/finetune/hyperparameters.py +++ b/aixplain/modules/finetune/hyperparameters.py @@ -5,6 +5,21 @@ class SchedulerType(Text, Enum): + """Enum representing different learning rate schedulers. + + This enum defines the possible learning rate schedulers that can be used + in the fine-tuning process. Each scheduler is represented by a string constant. + + Attributes: + LINEAR (Text): Linear learning rate scheduler. + COSINE (Text): Cosine learning rate scheduler. + COSINE_WITH_RESTARTS (Text): Cosine with restarts learning rate scheduler. + POLYNOMIAL (Text): Polynomial learning rate scheduler. + CONSTANT (Text): Constant learning rate scheduler. + CONSTANT_WITH_WARMUP (Text): Constant with warmup learning rate scheduler. + INVERSE_SQRT (Text): Inverse square root learning rate scheduler. + REDUCE_ON_PLATEAU (Text): Reduce learning rate on plateau learning rate scheduler. + """ LINEAR = "linear" COSINE = "cosine" COSINE_WITH_RESTARTS = "cosine_with_restarts" @@ -23,6 +38,22 @@ class SchedulerType(Text, Enum): @dataclass_json @dataclass class Hyperparameters(object): + """Configuration for the fine-tuning process. + + This class encapsulates the hyperparameters for training a model using a + fine-tuning approach. It includes settings for epochs, batch sizes, learning + rates, sequence lengths, and learning rate schedulers. + + Attributes: + epochs (int): Number of training epochs. + train_batch_size (int): Batch size for training. + eval_batch_size (int): Batch size for evaluation. + learning_rate (float): Learning rate for training. + max_seq_length (int): Maximum sequence length for model inputs. + warmup_ratio (float): Warmup ratio for learning rate scheduler. + warmup_steps (int): Number of warmup steps for learning rate scheduler. + lr_scheduler_type (SchedulerType): Type of learning rate scheduler. + """ epochs: int = 1 train_batch_size: int = 4 eval_batch_size: int = 4 @@ -33,6 +64,16 @@ class Hyperparameters(object): lr_scheduler_type: SchedulerType = SchedulerType.LINEAR def __post_init__(self): + """Post-initialization validation for the hyperparameters. + + This method performs validation checks on the hyperparameters after + initialization. It ensures that the provided values are of the correct + types and within the allowed ranges. + + Raises: + TypeError: If the provided values are not of the correct types. + ValueError: If the provided values are outside the allowed ranges. + """ if not isinstance(self.epochs, int): raise TypeError("epochs should be of type int") diff --git a/aixplain/modules/finetune/status.py b/aixplain/modules/finetune/status.py index 5f27aa72..38dd0451 100644 --- a/aixplain/modules/finetune/status.py +++ b/aixplain/modules/finetune/status.py @@ -30,6 +30,18 @@ @dataclass_json @dataclass class FinetuneStatus(object): + """Status information for a fine-tuning job. + + This class encapsulates the status of a fine-tuning job, including the overall + status of the job, the status of the model, and various training metrics. + + Attributes: + status (AssetStatus): Overall status of the fine-tuning job. + model_status (AssetStatus): Status of the fine-tuned model. + epoch (Optional[float]): Current training epoch. + training_loss (Optional[float]): Training loss at the current epoch. + validation_loss (Optional[float]): Validation loss at the current epoch. + """ status: "AssetStatus" model_status: "AssetStatus" epoch: Optional[float] = None diff --git a/aixplain/modules/metadata.py b/aixplain/modules/metadata.py index 07007ebe..23a7fd44 100644 --- a/aixplain/modules/metadata.py +++ b/aixplain/modules/metadata.py @@ -31,6 +31,26 @@ class MetaData: + """A class representing metadata for data in the aiXplain platform. + + This class provides functionality for managing metadata, which is used to store + information about data in the platform. It supports various data types, languages, + and storage formats. + + Attributes: + name (Text): Name of the data. + dtype (DataType): Type of data. + storage_type (StorageType): Storage type of the data. + data_column (Optional[Text]): Column index/name where the data is on a structured file. + start_column (Optional[Text]): Column index/name where the start indexes is on a structured file. + end_column (Optional[Text]): Column index/name where the end indexes is on a structured file. + privacy (Optional[Privacy]): Privacy of data. + file_extension (Optional[FileType]): File extension (e.g. CSV, TXT, etc.). + languages (List[Language]): List of languages which the data consists of. + dsubtype (DataSubtype): Data subtype (e.g., age, topic, race, split, etc.), used in datasets metadata. + id (Optional[Text]): Data ID. + kwargs (dict): Additional keyword arguments for extensibility. + """ def __init__( self, name: Text, @@ -46,11 +66,7 @@ def __init__( id: Optional[Text] = None, **kwargs ) -> None: - """MetaData Class - - Description: - This class is used to stored the meta-information of the Data Class. - It may be used to describe Data during the onboarding process of a corpus or dataset. + """Initialize a new MetaData instance. Args: name (Text): Data Name diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index 86c08a08..9386f2bb 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -26,16 +26,25 @@ class Metric(Asset): - """Represents a metric to be computed on one or more peices of data. It is usually linked to a machine learning task. + """A class representing a metric for evaluating machine learning model outputs. - Attributes: - id (Text): ID of the Metric - name (Text): Name of the Metric - description (Text): Description of the Metric - supplier (Text, optional): author of the Metric. Defaults to "aiXplain". - version (Text, optional): Metric version. Defaults to "1.0". - additional_info: Any additional Metric info to be saved + This class extends Asset to provide functionality for computing evaluation metrics + on one or more pieces of data. Each metric is typically associated with a specific + machine learning task and can require different inputs (e.g., reference text for + translation metrics). + Attributes: + id (Text): ID of the metric. + name (Text): Name of the metric. + supplier (Text): Author/provider of the metric. + is_reference_required (bool): Whether the metric requires reference data. + is_source_required (bool): Whether the metric requires source data. + cost (float): Cost per metric computation. + function (Text): The function identifier for this metric. + normalization_options (list): List of available normalization options. + description (Text): Description of the metric. + version (Text): Version of the metric implementation. + additional_info (dict): Additional metric-specific information. """ def __init__( @@ -50,17 +59,19 @@ def __init__( normalization_options: list = [], **additional_info, ) -> None: - """Create a Metric with the necessary information + """Initialize a new Metric instance. Args: - id (Text): ID of the Metric - name (Text): Name of the Metric - supplier (Text): author of the Metric - is_reference_required (bool): does the metric use reference - is_source_required (bool): does the metric use source - cost (float): price of the metric - normalization_options(list, []) - **additional_info: Any additional Metric info to be saved + id (Text): ID of the metric. + name (Text): Name of the metric. + supplier (Text): Author/provider of the metric. + is_reference_required (bool): Whether the metric requires reference data for computation. + is_source_required (bool): Whether the metric requires source data for computation. + cost (float): Cost per metric computation. + function (Text): The function identifier for this metric. + normalization_options (list, optional): List of available normalization options. + Defaults to empty list. + **additional_info: Additional metric-specific information to be stored. """ super().__init__(id, name, description="", supplier=supplier, version="1.0", cost=cost) self.is_source_required = is_source_required @@ -70,13 +81,22 @@ def __init__( self.additional_info = additional_info def __repr__(self) -> str: + """Return a string representation of the Metric instance. + + Returns: + str: A string in the format "". + """ return f"" - def add_normalization_options(self, normalization_options: List[str]): - """Add a given set of normalization options to be used while benchmarking + def add_normalization_options(self, normalization_options: List[str]) -> None: + """Add normalization options to be used during metric computation. + + This method appends new normalization options to the existing list of options. + These options can be used to normalize inputs or outputs during benchmarking. Args: - normalization_options (List[str]): List of normalization options to be added + normalization_options (List[str]): List of normalization options to add. + Each option should be a valid normalization identifier. """ self.normalization_options.append(normalization_options) @@ -85,13 +105,29 @@ def run( hypothesis: Optional[Union[str, List[str]]] = None, source: Optional[Union[str, List[str]]] = None, reference: Optional[Union[str, List[str]]] = None, - ): - """Run the metric to calculate the scores. + ) -> dict: + """Run the metric to calculate scores for the provided inputs. + + This method computes metric scores based on the provided hypothesis, and optionally + source and reference data. The inputs can be either single strings or lists of strings. Args: - hypothesis (Optional[Union[str, List[str]]], optional): Can give a single hypothesis or a list of hypothesis for metric calculation. Defaults to None. - source (Optional[Union[str, List[str]]], optional): Can give a single source or a list of sources for metric calculation. Defaults to None. - reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None. + hypothesis (Optional[Union[str, List[str]]], optional): The hypothesis/output to evaluate. + Can be a single string or a list of strings. Defaults to None. + source (Optional[Union[str, List[str]]], optional): The source data for evaluation. + Only used if is_source_required is True. Can be a single string or a list + of strings. Defaults to None. + reference (Optional[Union[str, List[str]]], optional): The reference data for evaluation. + Only used if is_reference_required is True. Can be a single string or a list + of strings. Defaults to None. + + Returns: + dict: A dictionary containing the computed metric scores and any additional + computation metadata. + + Note: + The method automatically handles conversion of single strings to lists and + proper formatting of references for multi-reference scenarios. """ from aixplain.factories.model_factory import ModelFactory diff --git a/aixplain/modules/model/__init__.py b/aixplain/modules/model/__init__.py index 5c96ab63..8b840cfc 100644 --- a/aixplain/modules/model/__init__.py +++ b/aixplain/modules/model/__init__.py @@ -39,26 +39,33 @@ class Model(Asset): - """This is ready-to-use AI model. This model can be run in both synchronous and asynchronous manner. + """A ready-to-use AI model that can be executed synchronously or asynchronously. + + This class represents a deployable AI model in the aiXplain platform. It provides + functionality for model execution, parameter management, and status tracking. + Models can be run with both synchronous and asynchronous APIs, and some models + support streaming responses. Attributes: - id (Text): ID of the Model - name (Text): Name of the Model - description (Text, optional): description of the model. Defaults to "". - api_key (Text, optional): API key of the Model. Defaults to None. - url (Text, optional): endpoint of the model. Defaults to config.MODELS_RUN_URL. - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Text, optional): version of the model. Defaults to "1.0". - function (Function, optional): model AI function. Defaults to None. - url (str): URL to run the model. - backend_url (str): URL of the backend. - pricing (Dict, optional): model price. Defaults to None. - **additional_info: Any additional Model info to be saved - input_params (ModelParameters, optional): input parameters for the function. - output_params (Dict, optional): output parameters for the function. - model_params (ModelParameters, optional): parameters for the function. - supports_streaming (bool, optional): whether the model supports streaming. Defaults to False. - function_type (FunctionType, optional): type of the function. Defaults to FunctionType.AI. + id (Text): ID of the model. + name (Text): Name of the model. + description (Text): Detailed description of the model's functionality. + api_key (Text): Authentication key for API access. + url (Text): Endpoint URL for model execution. + supplier (Union[Dict, Text, Supplier, int]): Provider/creator of the model. + version (Text): Version identifier of the model. + function (Function): The AI function this model performs. + backend_url (str): Base URL for the backend API. + cost (Dict): Pricing information for model usage. + input_params (ModelParameters): Parameters accepted by the model. + output_params (Dict): Description of model outputs. + model_params (ModelParameters): Configuration parameters for model behavior. + supports_streaming (bool): Whether the model supports streaming responses. + function_type (FunctionType): Category of function (AI, UTILITY, etc.). + is_subscribed (bool): Whether the user has an active subscription. + created_at (datetime): When the model was created. + status (AssetStatus): Current status of the model. + additional_info (dict): Additional model metadata. """ def __init__( @@ -81,25 +88,38 @@ def __init__( function_type: Optional[FunctionType] = FunctionType.AI, **additional_info, ) -> None: - """Model Init + """Initialize a new Model instance. Args: - id (Text): ID of the Model - name (Text): Name of the Model - description (Text, optional): description of the model. Defaults to "". - api_key (Text, optional): API key of the Model. Defaults to None. - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Text, optional): version of the model. Defaults to "1.0". - function (Text, optional): model AI function. Defaults to None. - is_subscribed (bool, optional): Is the user subscribed. Defaults to False. - cost (Dict, optional): model price. Defaults to None. - input_params (Dict, optional): input parameters for the function. - output_params (Dict, optional): output parameters for the function. - model_params (Dict, optional): parameters for the function. - supports_streaming (bool, optional): whether the model supports streaming. Defaults to False. - status (AssetStatus, optional): status of the model. Defaults to None. - function_type (FunctionType, optional): type of the function. Defaults to FunctionType.AI. - **additional_info: Any additional Model info to be saved + id (Text): ID of the Model. + name (Text, optional): Name of the Model. Defaults to "". + description (Text, optional): Description of the Model. Defaults to "". + api_key (Text, optional): Authentication key for API access. + Defaults to config.TEAM_API_KEY. + supplier (Union[Dict, Text, Supplier, int], optional): Provider/creator + of the model. Defaults to "aiXplain". + version (Text, optional): Version identifier of the model. Defaults to None. + function (Function, optional): The AI function this model performs. + Defaults to None. + is_subscribed (bool, optional): Whether the user has an active + subscription. Defaults to False. + cost (Dict, optional): Pricing information for model usage. + Defaults to None. + created_at (Optional[datetime], optional): When the model was created. + Defaults to None. + input_params (Dict, optional): Parameters accepted by the model. + Defaults to None. + output_params (Dict, optional): Description of model outputs. + Defaults to None. + model_params (Dict, optional): Configuration parameters for model + behavior. Defaults to None. + supports_streaming (bool, optional): Whether the model supports streaming + responses. Defaults to False. + status (AssetStatus, optional): Current status of the model. + Defaults to AssetStatus.ONBOARDED. + function_type (FunctionType, optional): Category of function. + Defaults to FunctionType.AI. + **additional_info: Additional model metadata. """ super().__init__(id, name, description, supplier, version, cost=cost) self.api_key = api_key @@ -122,10 +142,20 @@ def __init__( self.status = status def to_dict(self) -> Dict: - """Get the model info as a Dictionary + """Convert the model instance to a dictionary representation. Returns: - Dict: Model Information + Dict: A dictionary containing the model's configuration with keys: + - id: Unique identifier + - name: Model name + - description: Model description + - supplier: Model provider + - additional_info: Extra metadata (excluding None/empty values) + - input_params: Input parameter configuration + - output_params: Output parameter configuration + - model_params: Model behavior parameters + - function: AI function type + - status: Current model status """ clean_additional_info = {k: v for k, v in self.additional_info.items() if v not in [None, [], {}]} return { @@ -141,12 +171,23 @@ def to_dict(self) -> Dict: "status": self.status, } - def get_parameters(self) -> ModelParameters: + def get_parameters(self) -> Optional[ModelParameters]: + """Get the model's configuration parameters. + + Returns: + Optional[ModelParameters]: The model's parameter configuration if set, + None otherwise. + """ if self.model_params: return self.model_params return None - def __repr__(self): + def __repr__(self) -> str: + """Return a string representation of the model. + + Returns: + str: A string in the format "Model: by (id=)". + """ try: return f"Model: {self.name} by {self.supplier['name']} (id={self.id})" except Exception: @@ -159,16 +200,27 @@ def sync_poll( wait_time: float = 0.5, timeout: float = 300, ) -> ModelResponse: - """Keeps polling the platform to check whether an asynchronous call is done. + """Poll the platform until an asynchronous operation completes or times out. + + This method repeatedly checks the status of an asynchronous operation, + implementing exponential backoff for the polling interval. Args: - poll_url (Text): polling URL - name (Text, optional): ID given to a call. Defaults to "model_process". - wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. - timeout (float, optional): total polling time. Defaults to 300. + poll_url (Text): URL to poll for operation status. + name (Text, optional): Identifier for the operation for logging. + Defaults to "model_process". + wait_time (float, optional): Initial wait time in seconds between polls. + Will increase exponentially up to 60 seconds. Defaults to 0.5. + timeout (float, optional): Maximum total time to poll in seconds. + Defaults to 300. Returns: - Dict: response obtained by polling call + ModelResponse: The final response from the operation. If polling times + out or fails, returns a failed response with appropriate error message. + + Note: + The minimum wait time between polls is 0.2 seconds. The wait time + increases by 10% after each poll up to a maximum of 60 seconds. """ logging.info(f"Polling for Model: Start polling for {name}") start, end = time.time(), time.time() @@ -208,14 +260,20 @@ def sync_poll( return response_body def poll(self, poll_url: Text, name: Text = "model_process") -> ModelResponse: - """Poll the platform to check whether an asynchronous call is done. + """Make a single poll request to check operation status. Args: - poll_url (Text): polling - name (Text, optional): ID given to a call. Defaults to "model_process". + poll_url (Text): URL to poll for operation status. + name (Text, optional): Identifier for the operation for logging. + Defaults to "model_process". Returns: - Dict: response obtained by polling call + ModelResponse: The current status of the operation. Contains completion + status, any results or errors, and usage statistics. + + Note: + This is a low-level method used by sync_poll. Most users should use + sync_poll instead for complete operation handling. """ headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} r = _request_with_retry("get", poll_url, headers=headers) @@ -256,6 +314,19 @@ def run_stream( data: Union[Text, Dict], parameters: Optional[Dict] = None, ) -> ModelResponseStreamer: + """Execute the model with streaming response. + + Args: + data (Union[Text, Dict]): The input data for the model. + parameters (Optional[Dict], optional): Additional parameters for model + execution. Defaults to None. + + Returns: + ModelResponseStreamer: A streamer object that yields response chunks. + + Raises: + AssertionError: If the model doesn't support streaming. + """ assert self.supports_streaming, f"Model '{self.name} ({self.id})' does not support streaming" payload = build_payload(data=data, parameters=parameters, stream=True) url = f"{self.url}/{self.id}".replace("api/v1/execute", "api/v2/execute") @@ -273,17 +344,32 @@ def run( wait_time: float = 0.5, stream: bool = False, ) -> Union[ModelResponse, ModelResponseStreamer]: - """Runs a model call. + """Execute the model and wait for results. + + This method handles both synchronous and streaming execution modes. For + asynchronous operations, it polls until completion or timeout. Args: - data (Union[Text, Dict]): link to the input data - name (Text, optional): ID given to a call. Defaults to "model_process". - timeout (float, optional): total polling time. Defaults to 300. - parameters (Dict, optional): optional parameters to the model. Defaults to None. - wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. - stream (bool, optional): whether the model supports streaming. Defaults to False. + data (Union[Text, Dict]): The input data for the model. + name (Text, optional): Identifier for the operation for logging. + Defaults to "model_process". + timeout (float, optional): Maximum time to wait for completion in seconds. + Defaults to 300. + parameters (Dict, optional): Additional parameters for model execution. + Defaults to None. + wait_time (float, optional): Initial wait time between polls in seconds. + Defaults to 0.5. + stream (bool, optional): Whether to use streaming mode. Requires model + support. Defaults to False. + Returns: - Union[ModelResponse, ModelStreamer]: parsed output from model + Union[ModelResponse, ModelResponseStreamer]: The model's response. For + streaming mode, returns a streamer object. For regular mode, + returns a response object with results or error information. + + Note: + If the model execution becomes asynchronous, this method will poll + for completion using sync_poll with the specified timeout and wait_time. """ if stream: return self.run_stream(data=data, parameters=parameters) @@ -325,15 +411,24 @@ def run_async( name: Text = "model_process", parameters: Optional[Dict] = None, ) -> ModelResponse: - """Runs asynchronously a model call. + """Start asynchronous model execution. + + This method initiates model execution but doesn't wait for completion. + Use sync_poll to check the operation status later. Args: - data (Union[Text, Dict]): link to the input data - name (Text, optional): ID given to a call. Defaults to "model_process". - parameters (Dict, optional): optional parameters to the model. Defaults to None. + data (Union[Text, Dict]): The input data for the model. + name (Text, optional): Identifier for the operation for logging. + Defaults to "model_process". + parameters (Dict, optional): Additional parameters for model execution. + Defaults to None. Returns: - dict: polling URL in response + ModelResponse: Initial response containing: + - status: Current operation status + - url: URL for polling operation status + - error_message: Any immediate errors + - other response metadata """ url = f"{self.url}/{self.id}" payload = build_payload(data=data, parameters=parameters) @@ -417,7 +512,14 @@ def check_finetune_status(self, after_epoch: Optional[int] = None): logging.exception(error_message) def delete(self) -> None: - """Delete Model service""" + """Delete this model from the aiXplain platform. + + This method attempts to delete the model from the platform. It will fail + if the user doesn't have appropriate permissions. + + Raises: + Exception: If deletion fails or if the user doesn't have permission. + """ try: url = urljoin(self.backend_url, f"sdk/models/{self.id}") headers = { @@ -434,17 +536,42 @@ def delete(self) -> None: raise Exception(f"{message}") def add_additional_info_for_benchmark(self, display_name: str, configuration: Dict) -> None: - """Add additional info for benchmark - + """Add benchmark-specific information to the model. + + This method updates the model's additional_info with benchmark-related + metadata. + Args: - display_name (str): display name of the model - configuration (Dict): configuration of the model + display_name (str): Name for display in benchmarks. + configuration (Dict): Model configuration settings for benchmarking. """ self.additional_info["displayName"] = display_name self.additional_info["configuration"] = configuration @classmethod def from_dict(cls, data: Dict) -> "Model": + """Create a Model instance from a dictionary representation. + + Args: + data (Dict): Dictionary containing model configuration with keys: + - id: Model identifier + - name: Model name + - description: Model description + - api_key: API key for authentication + - supplier: Model provider information + - version: Model version + - function: AI function type + - is_subscribed: Subscription status + - cost: Pricing information + - created_at: Creation timestamp (ISO format) + - input_params: Input parameter configuration + - output_params: Output parameter configuration + - model_params: Model behavior parameters + - additional_info: Extra metadata + + Returns: + Model: A new Model instance populated with the dictionary data. + """ return cls( id=data.get("id", ""), name=data.get("name", ""), diff --git a/aixplain/modules/model/connection.py b/aixplain/modules/model/connection.py index 3e73574d..9e4ee1cb 100644 --- a/aixplain/modules/model/connection.py +++ b/aixplain/modules/model/connection.py @@ -5,18 +5,43 @@ class ConnectAction: + """A class representing an action that can be performed by a connection. + + This class defines the structure of a connection action with its name, description, + code, and input parameters. + + Attributes: + name (Text): The display name of the action. + description (Text): A detailed description of what the action does. + code (Optional[Text]): The internal code/identifier for the action. + inputs (Optional[Dict]): The input parameters required by the action. + """ + name: Text description: Text code: Optional[Text] = None inputs: Optional[Dict] = None def __init__(self, name: Text, description: Text, code: Optional[Text] = None, inputs: Optional[Dict] = None): + """Initialize a new ConnectAction instance. + + Args: + name (Text): The display name of the action. + description (Text): A detailed description of what the action does. + code (Optional[Text], optional): The internal code/identifier for the action. Defaults to None. + inputs (Optional[Dict], optional): The input parameters required by the action. Defaults to None. + """ self.name = name self.description = description self.code = code self.inputs = inputs def __repr__(self): + """Return a string representation of the ConnectAction instance. + + Returns: + str: A string in the format "Action(code=, name=)". + """ return f"Action(code={self.code}, name={self.name})" @@ -38,20 +63,20 @@ def __init__( function_type: Optional[FunctionType] = FunctionType.CONNECTION, **additional_info, ) -> None: - """Connection Init + """Initialize a new ConnectionTool instance. Args: - id (Text): ID of the Model - name (Text): Name of the Model - description (Text, optional): description of the model. Defaults to "". - api_key (Text, optional): API key of the Model. Defaults to None. - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Text, optional): version of the model. Defaults to "1.0". - function (Function, optional): model AI function. Defaults to None. + id (Text): ID of the Connection + name (Text): Name of the Connection + description (Text, optional): Description of the Connection. Defaults to "". + api_key (Text, optional): API key of the Connection. Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): Supplier of the Connection. Defaults to "aiXplain". + version (Text, optional): Version of the Connection. Defaults to "1.0". + function (Function, optional): Function of the Connection. Defaults to None. is_subscribed (bool, optional): Is the user subscribed. Defaults to False. - cost (Dict, optional): model price. Defaults to None. - scope (Text, optional): action scope of the connection. Defaults to None. - **additional_info: Any additional Model info to be saved + cost (Dict, optional): Cost of the Connection. Defaults to None. + function_type (FunctionType, optional): Type of the Connection. Defaults to FunctionType.CONNECTION. + **additional_info: Any additional Connection info to be saved """ assert ( function_type == FunctionType.CONNECTION or function_type == FunctionType.MCP_CONNECTION @@ -75,6 +100,14 @@ def __init__( self.action_scope = None def _get_actions(self): + """Retrieve the list of available actions for this connection. + + Returns: + List[ConnectAction]: A list of available actions for this connection. + + Raises: + Exception: If the actions cannot be retrieved from the server. + """ response = super().run({"action": "LIST_ACTIONS", "data": " "}) if response.status == ResponseStatus.SUCCESS: return [ @@ -86,6 +119,18 @@ def _get_actions(self): ) def get_action_inputs(self, action: Union[ConnectAction, Text]): + """Retrieve the input parameters required for a specific action. + + Args: + action (Union[ConnectAction, Text]): The action to get inputs for, either as a ConnectAction object + or as a string code. + + Returns: + Dict: A dictionary containing the input parameters for the action. + + Raises: + Exception: If the inputs cannot be retrieved from the server. + """ if action.inputs: return action.inputs @@ -108,11 +153,31 @@ def get_action_inputs(self, action: Union[ConnectAction, Text]): ) def run(self, action: Union[ConnectAction, Text], inputs: Dict): + """Execute a specific action with the provided inputs. + + Args: + action (Union[ConnectAction, Text]): The action to execute, either as a ConnectAction object + or as a string code. + inputs (Dict): The input parameters for the action. + + Returns: + Response: The response from the server after executing the action. + """ if isinstance(action, ConnectAction): action = action.code return super().run({"action": action, "data": inputs}) def get_parameters(self) -> List[Dict]: + """Get the parameters for all actions in the current action scope. + + Returns: + List[Dict]: A list of dictionaries containing the parameters for each action + in the action scope. Each dictionary contains the action's code, name, + description, and input parameters. + + Raises: + AssertionError: If the action scope is not set or is empty. + """ assert ( self.action_scope is not None and len(self.action_scope) > 0 ), f"Please set the scope of actions for the connection '{self.id}'." @@ -128,4 +193,9 @@ def get_parameters(self) -> List[Dict]: return response def __repr__(self): + """Return a string representation of the ConnectionTool instance. + + Returns: + str: A string in the format "ConnectionTool(id=, name=)". + """ return f"ConnectionTool(id={self.id}, name={self.name})" diff --git a/aixplain/modules/model/index_model.py b/aixplain/modules/model/index_model.py index 31c91531..b9420f52 100644 --- a/aixplain/modules/model/index_model.py +++ b/aixplain/modules/model/index_model.py @@ -14,6 +14,21 @@ class IndexFilterOperator(Enum): + """Enumeration of operators available for filtering index records. + + This enum defines the comparison operators that can be used when creating + filters for searching and retrieving records from an index. + + Attributes: + EQUALS (str): Equality operator ("==") + NOT_EQUALS (str): Inequality operator ("!=") + CONTAINS (str): Membership test operator ("in") + NOT_CONTAINS (str): Negative membership test operator ("not in") + GREATER_THAN (str): Greater than operator (">") + LESS_THAN (str): Less than operator ("<") + GREATER_THAN_OR_EQUALS (str): Greater than or equal to operator (">=") + LESS_THAN_OR_EQUALS (str): Less than or equal to operator ("<=") + """ EQUALS = "==" NOT_EQUALS = "!=" CONTAINS = "in" @@ -25,16 +40,40 @@ class IndexFilterOperator(Enum): class IndexFilter: + """A class representing a filter for querying index records. + + This class defines a filter that can be used to search or retrieve records from an index + based on specific field values and comparison operators. + + Attributes: + field (str): The name of the field to filter on. + value (str): The value to compare against. + operator (Union[IndexFilterOperator, str]): The comparison operator to use. + """ + field: str value: str operator: Union[IndexFilterOperator, str] def __init__(self, field: str, value: str, operator: Union[IndexFilterOperator, str]): + """Initialize a new IndexFilter instance. + + Args: + field (str): The name of the field to filter on. + value (str): The value to compare against. + operator (Union[IndexFilterOperator, str]): The comparison operator to use. + """ self.field = field self.value = value self.operator = operator def to_dict(self): + """Convert the filter to a dictionary representation. + + Returns: + dict: A dictionary containing the filter's field, value, and operator. + The operator is converted to its string value if it's an IndexFilterOperator. + """ return { "field": self.field, "value": self.value, @@ -43,6 +82,19 @@ def to_dict(self): class Splitter: + """A class for configuring how documents should be split during indexing. + + This class provides options for splitting documents into smaller chunks before + they are indexed, which can be useful for large documents or for specific + search requirements. + + Attributes: + split (bool): Whether to split the documents or not. + split_by (SplittingOptions): The method to use for splitting (e.g., by word, sentence). + split_length (int): The length of each split chunk. + split_overlap (int): The number of overlapping units between consecutive chunks. + """ + def __init__( self, split: bool = False, @@ -50,6 +102,16 @@ def __init__( split_length: int = 1, split_overlap: int = 0, ): + """Initialize a new Splitter instance. + + Args: + split (bool, optional): Whether to split the documents. Defaults to False. + split_by (SplittingOptions, optional): The method to use for splitting. + Defaults to SplittingOptions.WORD. + split_length (int, optional): The length of each split chunk. Defaults to 1. + split_overlap (int, optional): The number of overlapping units between + consecutive chunks. Defaults to 0. + """ self.split = split self.split_by = split_by self.split_length = split_length @@ -72,20 +134,24 @@ def __init__( function_type: Optional[FunctionType] = FunctionType.SEARCH, **additional_info, ) -> None: - """Index Init + """Initialize a new IndexModel instance. Args: - id (Text): ID of the Model - name (Text): Name of the Model - description (Text, optional): description of the model. Defaults to "". - api_key (Text, optional): API key of the Model. Defaults to None. - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Text, optional): version of the model. Defaults to "1.0". - function (Function, optional): model AI function. Defaults to None. - is_subscribed (bool, optional): Is the user subscribed. Defaults to False. - cost (Dict, optional): model price. Defaults to None. - embedding_model (Union[EmbeddingModel, str], optional): embedding model. Defaults to None. - **additional_info: Any additional Model info to be saved + id (Text): ID of the Index Model. + name (Text): Name of the Index Model. + description (Text, optional): Description of the Index Model. Defaults to "". + api_key (Text, optional): API key of the Index Model. Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): Supplier of the Index Model. Defaults to "aiXplain". + version (Text, optional): Version of the Index Model. Defaults to "1.0". + function (Function, optional): Function of the Index Model. Must be Function.SEARCH. + is_subscribed (bool, optional): Whether the user is subscribed. Defaults to False. + cost (Dict, optional): Cost of the Index Model. Defaults to None. + embedding_model (Union[EmbeddingModel, str], optional): Model used for embedding documents. Defaults to None. + function_type (FunctionType, optional): Type of the function. Defaults to FunctionType.SEARCH. + **additional_info: Any additional Index Model info to be saved. + + Raises: + AssertionError: If function is not Function.SEARCH. """ assert function == Function.SEARCH, "Index only supports search function" super().__init__( @@ -117,6 +183,15 @@ def __init__( self.embedding_size = None def to_dict(self) -> Dict: + """Convert the IndexModel instance to a dictionary representation. + + Returns: + Dict: A dictionary containing the model's attributes, including: + - All attributes from the parent Model class + - embedding_model: The model used for embedding documents + - embedding_size: The size of the embeddings produced + - collection_type: The type of collection derived from the version + """ data = super().to_dict() data["embedding_model"] = self.embedding_model data["embedding_size"] = self.embedding_size @@ -198,6 +273,18 @@ def upsert(self, documents: List[Record], splitter: Optional[Splitter] = None) - raise Exception(f"Failed to upsert documents: {response.error_message}") def count(self) -> float: + """Get the total number of documents in the index. + + Returns: + float: The number of documents in the index. + + Raises: + Exception: If the count operation fails. + + Example: + >>> index_model.count() + 42 + """ data = {"action": "count", "data": ""} response = self.run(data=data) if response.status == "SUCCESS": diff --git a/aixplain/modules/model/integration.py b/aixplain/modules/model/integration.py index c05798e0..24938d2c 100644 --- a/aixplain/modules/model/integration.py +++ b/aixplain/modules/model/integration.py @@ -8,6 +8,19 @@ import json class AuthenticationSchema(Enum): + """Enumeration of supported authentication schemes for integrations. + + This enum defines the various authentication methods that can be used + when connecting to external services through integrations. + + Attributes: + BEARER_TOKEN (str): Bearer token authentication scheme. + OAUTH1 (str): OAuth 1.0 authentication scheme. + OAUTH2 (str): OAuth 2.0 authentication scheme. + API_KEY (str): API key authentication scheme. + BASIC (str): Basic authentication scheme (username/password). + NO_AUTH (str): No authentication required. + """ BEARER_TOKEN = "BEARER_TOKEN" OAUTH1 = "OAUTH1" OAUTH2 = "OAUTH2" @@ -16,10 +29,37 @@ class AuthenticationSchema(Enum): NO_AUTH = "NO_AUTH" class BaseAuthenticationParams(BaseModel): + """Base model for authentication parameters used in integrations. + + This class defines the common parameters that are used across different + authentication schemes when connecting to external services. + + Attributes: + name (Optional[Text]): Optional name for the connection. Defaults to None. + connector_id (Optional[Text]): Optional ID of the connector. Defaults to None. + """ name: Optional[Text] = None connector_id: Optional[Text] = None def build_connector_params(**kwargs) -> BaseAuthenticationParams: + """Build authentication parameters for a connector from keyword arguments. + + This function creates a BaseAuthenticationParams instance from the provided + keyword arguments, extracting the name and connector_id if present. + + Args: + **kwargs: Arbitrary keyword arguments. Supported keys: + - name (Optional[Text]): Name for the connection + - connector_id (Optional[Text]): ID of the connector + + Returns: + BaseAuthenticationParams: An instance containing the extracted parameters. + + Example: + >>> params = build_connector_params(name="My Connection", connector_id="123") + >>> print(params.name) + 'My Connection' + """ name = kwargs.get("name") connector_id = kwargs.get("connector_id") return BaseAuthenticationParams(name=name, connector_id=connector_id) @@ -40,19 +80,24 @@ def __init__( function_type: Optional[FunctionType] = FunctionType.INTEGRATION, **additional_info, ) -> None: - """Integration Init + """Initialize a new Integration instance. Args: - id (Text): ID of the Model - name (Text): Name of the Model - description (Text, optional): description of the model. Defaults to "". - api_key (Text, optional): API key of the Model. Defaults to None. - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Text, optional): version of the model. Defaults to "1.0". - function (Function, optional): model AI function. Defaults to None. - is_subscribed (bool, optional): Is the user subscribed. Defaults to False. - cost (Dict, optional): model price. Defaults to None. - **additional_info: Any additional Model info to be saved + id (Text): ID of the Integration. + name (Text): Name of the Integration. + description (Text, optional): Description of the Integration. Defaults to "". + api_key (Text, optional): API key for the Integration. Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): Supplier of the Integration. Defaults to "aiXplain". + version (Text, optional): Version of the Integration. Defaults to "1.0". + function (Function, optional): Function of the Integration. Defaults to None. + is_subscribed (bool, optional): Whether the user is subscribed. Defaults to False. + cost (Dict, optional): Cost of the Integration. Defaults to None. + function_type (FunctionType, optional): Type of the function. Must be FunctionType.INTEGRATION. + Defaults to FunctionType.INTEGRATION. + **additional_info: Any additional Integration info to be saved. + + Raises: + AssertionError: If function_type is not FunctionType.INTEGRATION. """ assert function_type == FunctionType.INTEGRATION, "Integration only supports connector function" super().__init__( @@ -75,25 +120,57 @@ def __init__( def connect(self, authentication_schema: AuthenticationSchema, args: Optional[BaseAuthenticationParams] = None, data: Optional[Dict] = None, **kwargs) -> ModelResponse: - """Connect to the integration + """Connect to the integration using the specified authentication scheme. - Examples: - - For Bearer Token Authentication: - >>> integration.connect(BearerAuthenticationSchema(name="My Connection", token="1234567890")) - >>> integration.connect(BearerAuthenticationSchema(token="1234567890")) - >>> integration.connect(token="1234567890") - - For OAuth Authentication: - >>> integration.connect(OAuthAuthenticationSchema(name="My Connection", client_id="1234567890", client_secret="1234567890")) - >>> integration.connect(OAuthAuthenticationSchema(client_id="1234567890", client_secret="1234567890")) - >>> integration.connect(client_id="1234567890", client_secret="1234567890") - - For OAuth2 Authentication: - >>> integration.connect(OAuth2AuthenticationSchema(name="My Connection")) - >>> integration.connect() - Make sure to click on the redirect url to complete the connection. + This method establishes a connection to the integration service using the provided + authentication method and credentials. The required parameters vary depending on + the authentication scheme being used. + + Args: + authentication_schema (AuthenticationSchema): The authentication scheme to use + (e.g., BEARER_TOKEN, OAUTH1, OAUTH2, API_KEY, BASIC, NO_AUTH). + args (Optional[BaseAuthenticationParams], optional): Common connection parameters. + If not provided, will be built from kwargs. Defaults to None. + data (Optional[Dict], optional): Authentication-specific parameters required by + the chosen authentication scheme. Defaults to None. + **kwargs: Additional keyword arguments used to build BaseAuthenticationParams + if args is not provided. Supported keys: + - name (str): Name for the connection + - connector_id (str): ID of the connector Returns: - id: Connection ID (retrieve it with ModelFactory.get(id)) - redirectUrl: Redirect URL to complete the connection (only for OAuth2) + ModelResponse: A response object containing: + - data (Dict): Contains connection details including: + - id (str): Connection ID (can be used with ModelFactory.get(id)) + - redirectURL (str, optional): URL to complete OAuth authentication + (only for OAuth1/OAuth2) + + Raises: + ValueError: If the authentication schema is not supported by this integration + or if required parameters are missing from the data dictionary. + + Examples: + Using Bearer Token authentication: + >>> integration.connect( + ... AuthenticationSchema.BEARER_TOKEN, + ... data={"token": "1234567890"}, + ... name="My Connection" + ... ) + + Using OAuth2 authentication: + >>> response = integration.connect( + ... AuthenticationSchema.OAUTH2, + ... name="My Connection" + ... ) + >>> # For OAuth2, you'll need to visit the redirectURL to complete auth + >>> print(response.data.get("redirectURL")) + + Using API Key authentication: + >>> integration.connect( + ... AuthenticationSchema.API_KEY, + ... data={"api_key": "your-api-key"}, + ... name="My Connection" + ... ) """ if self.id == "686eb9cd26480723d0634d3e": return self.run({"data": data}) @@ -138,6 +215,12 @@ def connect(self, authentication_schema: AuthenticationSchema, args: Optional[Ba def __repr__(self): + """Return a string representation of the Integration instance. + + Returns: + str: A string in the format "Integration: by (id=)". + If supplier is a dictionary, uses supplier['name'], otherwise uses supplier directly. + """ try: return f"Integration: {self.name} by {self.supplier['name']} (id={self.id})" except Exception: diff --git a/aixplain/modules/model/llm_model.py b/aixplain/modules/model/llm_model.py index a1eeb95e..88f0b52b 100644 --- a/aixplain/modules/model/llm_model.py +++ b/aixplain/modules/model/llm_model.py @@ -67,20 +67,24 @@ def __init__( function_type: Optional[FunctionType] = FunctionType.AI, **additional_info, ) -> None: - """LLM Init + """Initialize a new LLM instance. Args: - id (Text): ID of the Model - name (Text): Name of the Model - description (Text, optional): description of the model. Defaults to "". - api_key (Text, optional): API key of the Model. Defaults to None. - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Text, optional): version of the model. Defaults to "1.0". - function (Function, optional): model AI function. Defaults to None. - is_subscribed (bool, optional): Is the user subscribed. Defaults to False. - cost (Dict, optional): model price. Defaults to None. - function_type (FunctionType, optional): type of the function. Defaults to FunctionType.AI. - **additional_info: Any additional Model info to be saved + id (Text): ID of the LLM model. + name (Text): Name of the LLM model. + description (Text, optional): Description of the model. Defaults to "". + api_key (Text, optional): API key for the model. Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): Supplier of the model. Defaults to "aiXplain". + version (Text, optional): Version of the model. Defaults to "1.0". + function (Function, optional): Model's AI function. Must be Function.TEXT_GENERATION. + is_subscribed (bool, optional): Whether the user is subscribed. Defaults to False. + cost (Dict, optional): Cost of the model. Defaults to None. + temperature (float, optional): Default temperature for text generation. Defaults to 0.001. + function_type (FunctionType, optional): Type of the function. Defaults to FunctionType.AI. + **additional_info: Any additional model info to be saved. + + Raises: + AssertionError: If function is not Function.TEXT_GENERATION. """ assert function == Function.TEXT_GENERATION, "LLM only supports large language models (i.e. text generation function)" super().__init__( @@ -115,23 +119,42 @@ def run( wait_time: float = 0.5, stream: bool = False, ) -> Union[ModelResponse, ModelResponseStreamer]: - """Synchronously running a Large Language Model (LLM) model. + """Run the LLM model synchronously to generate text. + + This method runs the LLM model to generate text based on the provided input. + It supports both single-turn and conversational interactions, with options + for streaming responses. Args: - data (Union[Text, Dict]): Text to LLM or last user utterance of a conversation. - context (Optional[Text], optional): System message. Defaults to None. - prompt (Optional[Text], optional): Prompt Message which comes on the left side of the last utterance. Defaults to None. - history (Optional[List[Dict]], optional): Conversation history in OpenAI format ([{ "role": "assistant", "content": "Hello, world!"}]). Defaults to None. - temperature (Optional[float], optional): LLM temperature. Defaults to None. - max_tokens (int, optional): Maximum Generation Tokens. Defaults to 128. - top_p (float, optional): Top P. Defaults to 1.0. - name (Text, optional): ID given to a call. Defaults to "model_process". - timeout (float, optional): total polling time. Defaults to 300. - parameters (Dict, optional): optional parameters to the model. Defaults to None. - wait_time (float, optional): wait time in seconds between polling calls. Defaults to 0.5. - stream (bool, optional): whether the model supports streaming. Defaults to False. + data (Text): The input text or last user utterance for text generation. + context (Optional[Text], optional): System message or context for the model. + Defaults to None. + prompt (Optional[Text], optional): Prompt template or prefix to prepend to + the input. Defaults to None. + history (Optional[List[Dict]], optional): Conversation history in OpenAI format + (e.g., [{"role": "assistant", "content": "Hello!"}, ...]). Defaults to None. + temperature (Optional[float], optional): Sampling temperature for text generation. + Higher values make output more random. If None, uses the model's default. + Defaults to None. + max_tokens (int, optional): Maximum number of tokens to generate. + Defaults to 128. + top_p (float, optional): Nucleus sampling parameter. Only tokens with cumulative + probability < top_p are considered. Defaults to 1.0. + name (Text, optional): Identifier for this model run. Useful for logging. + Defaults to "model_process". + timeout (float, optional): Maximum time in seconds to wait for completion. + Defaults to 300. + parameters (Optional[Dict], optional): Additional model-specific parameters. + Defaults to None. + wait_time (float, optional): Time in seconds between polling attempts. + Defaults to 0.5. + stream (bool, optional): Whether to stream the model's output tokens. + Defaults to False. + Returns: - Union[ModelResponse, ModelStreamer]: parsed output from model + Union[ModelResponse, ModelResponseStreamer]: If stream=False, returns a ModelResponse + containing the complete generated text and metadata. If stream=True, returns + a ModelResponseStreamer that yields tokens as they're generated. """ start = time.time() parameters = parameters or {} @@ -191,21 +214,41 @@ def run_async( name: Text = "model_process", parameters: Optional[Dict] = None, ) -> ModelResponse: - """Runs asynchronously a model call. + """Run the LLM model asynchronously to generate text. + + This method starts an asynchronous text generation task and returns immediately + with a response containing a polling URL. The actual result can be retrieved + later using the polling URL. Args: - data (Union[Text, Dict]): Text to LLM or last user utterance of a conversation. - context (Optional[Text], optional): System message. Defaults to None. - prompt (Optional[Text], optional): Prompt Message which comes on the left side of the last utterance. Defaults to None. - history (Optional[List[Dict]], optional): Conversation history in OpenAI format ([{ "role": "assistant", "content": "Hello, world!"}]). Defaults to None. - temperature (Optional[float], optional): LLM temperature. Defaults to None. - max_tokens (int, optional): Maximum Generation Tokens. Defaults to 128. - top_p (float, optional): Top P. Defaults to 1.0. - name (Text, optional): ID given to a call. Defaults to "model_process". - parameters (Dict, optional): optional parameters to the model. Defaults to None. + data (Text): The input text or last user utterance for text generation. + context (Optional[Text], optional): System message or context for the model. + Defaults to None. + prompt (Optional[Text], optional): Prompt template or prefix to prepend to + the input. Defaults to None. + history (Optional[List[Dict]], optional): Conversation history in OpenAI format + (e.g., [{"role": "assistant", "content": "Hello!"}, ...]). Defaults to None. + temperature (Optional[float], optional): Sampling temperature for text generation. + Higher values make output more random. If None, uses the model's default. + Defaults to None. + max_tokens (int, optional): Maximum number of tokens to generate. + Defaults to 128. + top_p (float, optional): Nucleus sampling parameter. Only tokens with cumulative + probability < top_p are considered. Defaults to 1.0. + name (Text, optional): Identifier for this model run. Useful for logging. + Defaults to "model_process". + parameters (Optional[Dict], optional): Additional model-specific parameters. + Defaults to None. Returns: - dict: polling URL in response + ModelResponse: A response object containing: + - status (ResponseStatus): Status of the request (e.g., IN_PROGRESS) + - url (str): URL to poll for the final result + - data (str): Empty string (result not available yet) + - details (Dict): Additional response details + - completed (bool): False (task not completed yet) + - error_message (str): Error message if request failed + Other fields may be present depending on the response. """ url = f"{self.url}/{self.id}" logging.debug(f"Model Run Async: Start service for {name} - {url}") diff --git a/aixplain/modules/model/mcp_connection.py b/aixplain/modules/model/mcp_connection.py index 19155694..eac605fd 100644 --- a/aixplain/modules/model/mcp_connection.py +++ b/aixplain/modules/model/mcp_connection.py @@ -5,18 +5,45 @@ class ConnectAction: + """A class representing an action that can be performed by an MCP connection. + + This class defines the structure of a connection action with its name, description, + code, and input parameters. + + Attributes: + name (Text): The display name of the action. + description (Text): A detailed description of what the action does. + code (Optional[Text]): The internal code/identifier for the action. Defaults to None. + inputs (Optional[Dict]): The input parameters required by the action. Defaults to None. + """ + name: Text description: Text code: Optional[Text] = None inputs: Optional[Dict] = None def __init__(self, name: Text, description: Text, code: Optional[Text] = None, inputs: Optional[Dict] = None): + """Initialize a new ConnectAction instance. + + Args: + name (Text): The display name of the action. + description (Text): A detailed description of what the action does. + code (Optional[Text], optional): The internal code/identifier for the action. + Defaults to None. + inputs (Optional[Dict], optional): The input parameters required by the action. + Defaults to None. + """ self.name = name self.description = description self.code = code self.inputs = inputs def __repr__(self): + """Return a string representation of the ConnectAction instance. + + Returns: + str: A string in the format "Action(code=, name=)". + """ return f"Action(code={self.code}, name={self.name})" @@ -38,20 +65,25 @@ def __init__( function_type: Optional[FunctionType] = FunctionType.CONNECTION, **additional_info, ) -> None: - """Connection Init + """Initialize a new MCPConnection instance. Args: - id (Text): ID of the Model - name (Text): Name of the Model - description (Text, optional): description of the model. Defaults to "". - api_key (Text, optional): API key of the Model. Defaults to None. - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Text, optional): version of the model. Defaults to "1.0". - function (Function, optional): model AI function. Defaults to None. - is_subscribed (bool, optional): Is the user subscribed. Defaults to False. - cost (Dict, optional): model price. Defaults to None. - scope (Text, optional): action scope of the connection. Defaults to None. - **additional_info: Any additional Model info to be saved + id (Text): ID of the MCP Connection. + name (Text): Name of the MCP Connection. + description (Text, optional): Description of the Connection. Defaults to "". + api_key (Text, optional): API key for the Connection. Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): Supplier of the Connection. + Defaults to "aiXplain". + version (Text, optional): Version of the Connection. Defaults to "1.0". + function (Function, optional): Function of the Connection. Defaults to None. + is_subscribed (bool, optional): Whether the user is subscribed. Defaults to False. + cost (Dict, optional): Cost of the Connection. Defaults to None. + function_type (FunctionType, optional): Type of the function. Must be + FunctionType.MCP_CONNECTION. Defaults to FunctionType.CONNECTION. + **additional_info: Any additional Connection info to be saved. + + Raises: + AssertionError: If function_type is not FunctionType.MCP_CONNECTION. """ assert function_type == FunctionType.MCP_CONNECTION, "Connection only supports mcp connection function" super().__init__( @@ -69,6 +101,18 @@ def __init__( ) def _get_actions(self): + """Retrieve the list of available tools for this MCP connection. + + This internal method fetches the list of tools that can be used with this + connection by calling the LIST_TOOLS action. + + Returns: + List[ConnectAction]: A list of available tools, each represented as a + ConnectAction object. + + Raises: + Exception: If the tools cannot be retrieved from the server. + """ response = Model.run(self, {"action": "LIST_TOOLS", "data": " "}) if response.status == ResponseStatus.SUCCESS: return [ @@ -80,6 +124,23 @@ def _get_actions(self): ) def get_action_inputs(self, action: Union[ConnectAction, Text]): + """Retrieve the input parameters required for a specific tool. + + This method fetches the input parameters that are required to use a specific + tool. If the action object already has its inputs cached, returns those + instead of making a server request. + + Args: + action (Union[ConnectAction, Text]): The tool to get inputs for, either as + a ConnectAction object or as a string code. + + Returns: + Dict: A dictionary mapping input parameter codes to their specifications. + + Raises: + Exception: If the inputs cannot be retrieved from the server or if the + response cannot be parsed. + """ if action.inputs: return action.inputs diff --git a/aixplain/modules/model/model_response_streamer.py b/aixplain/modules/model/model_response_streamer.py index f1fef5f8..84f5ccca 100644 --- a/aixplain/modules/model/model_response_streamer.py +++ b/aixplain/modules/model/model_response_streamer.py @@ -5,13 +5,27 @@ class ModelResponseStreamer: + """A class representing a streamer for model responses. + + This class provides an iterator interface for streaming model responses. + It handles the conversion of JSON-like strings into ModelResponse objects + and manages the response status. + """ + def __init__(self, iterator: Iterator): + """Initialize a new ModelResponseStreamer instance. + + Args: + iterator (Iterator): An iterator that yields JSON-like strings. + """ self.iterator = iterator self.status = ResponseStatus.IN_PROGRESS def __next__(self): - """ - Returns the next chunk of the response. + """Return the next chunk of the response. + + Returns: + ModelResponse: A ModelResponse object containing the next chunk of the response. """ line = next(self.iterator).replace("data: ", "") try: @@ -25,4 +39,9 @@ def __next__(self): return ModelResponse(status=self.status, data=content) def __iter__(self): + """Return the iterator for the ModelResponseStreamer. + + Returns: + Iterator: The iterator for the ModelResponseStreamer. + """ return self diff --git a/aixplain/modules/model/record.py b/aixplain/modules/model/record.py index 4cc958d8..30f5c8f7 100644 --- a/aixplain/modules/model/record.py +++ b/aixplain/modules/model/record.py @@ -4,6 +4,11 @@ class Record: + """A class representing a record in an index. + + This class defines the structure of a record with its value, type, ID, URI, + and attributes. + """ def __init__( self, value: str = "", @@ -12,6 +17,15 @@ def __init__( uri: str = "", attributes: dict = {}, ): + """Initialize a new Record instance. + + Args: + value (str): The value of the record. + value_type (DataType): The type of the value. + id (Optional[str]): The ID of the record. Defaults to a random UUID. + uri (str): The URI of the record. + attributes (dict): The attributes of the record. + """ self.value = value self.value_type = value_type self.id = id if id is not None else str(uuid4()) @@ -19,6 +33,11 @@ def __init__( self.attributes = attributes def to_dict(self): + """Convert the record to a dictionary. + + Returns: + dict: A dictionary containing the record's value, type, ID, URI, and attributes. + """ return { "data": self.value, "dataType": str(self.value_type), @@ -28,7 +47,11 @@ def to_dict(self): } def validate(self): - """Validate the record""" + """Validate the record. + + Raises: + AssertionError: If the value type is invalid or if the URI is required for image records. + """ from aixplain.factories import FileFactory from aixplain.modules.model.utils import is_supported_image_type diff --git a/aixplain/modules/model/response.py b/aixplain/modules/model/response.py index a0cf08f8..be37b7f1 100644 --- a/aixplain/modules/model/response.py +++ b/aixplain/modules/model/response.py @@ -4,7 +4,12 @@ class ModelResponse: - """ModelResponse class to store the response of the model run.""" + """ModelResponse class to store the response of the model run. + + This class provides a structured way to store and manage the response from model runs. + It includes fields for status, data, details, completion status, error messages, + usage information, and additional metadata. + """ def __init__( self, @@ -20,6 +25,21 @@ def __init__( error_code: Optional[ErrorCode] = None, **kwargs, ): + """Initialize a new ModelResponse instance. + + Args: + status (ResponseStatus): The status of the response. + data (Text): The data returned by the model. + details (Optional[Union[Dict, List]]): Additional details about the response. + completed (bool): Whether the response is complete. + error_message (Text): The error message if the response is not successful. + used_credits (float): The amount of credits used for the response. + run_time (float): The time taken to generate the response. + usage (Optional[Dict]): Usage information about the response. + url (Optional[Text]): The URL of the response. + error_code (Optional[ErrorCode]): The error code if the response is not successful. + **kwargs: Additional keyword arguments. + """ self.status = status self.data = data self.details = details @@ -37,6 +57,17 @@ def __init__( self.additional_fields = kwargs def __getitem__(self, key: Text) -> Any: + """Get an item from the ModelResponse. + + Args: + key (Text): The key to get the value for. + + Returns: + Any: The value associated with the key. + + Raises: + KeyError: If the key is not found in the ModelResponse. + """ if key in self.__dict__: return self.__dict__[key] elif self.additional_fields and key in self.additional_fields: @@ -48,12 +79,30 @@ def __getitem__(self, key: Text) -> Any: raise KeyError(f"Key '{key}' not found in ModelResponse.") def get(self, key: Text, default: Optional[Any] = None) -> Any: + """Get an item from the ModelResponse with a default value. + + Args: + key (Text): The key to get the value for. + default (Optional[Any]): The default value to return if the key is not found. + + Returns: + Any: The value associated with the key or the default value if the key is not found. + """ try: return self[key] except KeyError: return default def __setitem__(self, key: Text, value: Any) -> None: + """Set an item in the ModelResponse. + + Args: + key (Text): The key to set the value for. + value (Any): The value to set. + + Raises: + KeyError: If the key is not found in the ModelResponse. + """ if key in self.__dict__: self.__dict__[key] = value elif self.additional_fields and key in self.additional_fields: @@ -66,6 +115,11 @@ def __setitem__(self, key: Text, value: Any) -> None: raise KeyError(f"Key '{key}' not found in ModelResponse.") def __repr__(self) -> str: + """Return a string representation of the ModelResponse. + + Returns: + str: A string representation of the ModelResponse. + """ fields = [] if self.status: fields.append(f"status={self.status}") @@ -92,6 +146,14 @@ def __repr__(self) -> str: return f"ModelResponse({', '.join(fields)})" def __contains__(self, key: Text) -> bool: + """Check if a key is in the ModelResponse. + + Args: + key (Text): The key to check for. + + Returns: + bool: True if the key is in the ModelResponse, False otherwise. + """ try: self[key] return True @@ -99,6 +161,11 @@ def __contains__(self, key: Text) -> bool: return False def to_dict(self) -> Dict[Text, Any]: + """Convert the ModelResponse to a dictionary. + + Returns: + Dict[Text, Any]: A dictionary representation of the ModelResponse. + """ base_dict = { "status": self.status, "data": self.data, diff --git a/aixplain/modules/model/utility_model.py b/aixplain/modules/model/utility_model.py index 7ad49673..9b54cdad 100644 --- a/aixplain/modules/model/utility_model.py +++ b/aixplain/modules/model/utility_model.py @@ -34,6 +34,17 @@ class BaseUtilityModelParams(BaseModel): + """Base model for utility model parameters. + + This class defines the basic parameters required to create or update a utility model. + + Attributes: + name (Text): The name of the utility model. + code (Union[Text, Callable]): The implementation code, either as a string or + a callable function. + description (Optional[Text]): A description of what the utility model does. + Defaults to None. + """ name: Text code: Union[Text, Callable] description: Optional[Text] = None @@ -41,15 +52,38 @@ class BaseUtilityModelParams(BaseModel): @dataclass class UtilityModelInput: + """A class representing an input parameter for a utility model. + + This class defines the structure and validation rules for input parameters + that can be used with utility models. + + Attributes: + name (Text): The name of the input parameter. + description (Text): A description of what this input parameter represents. + type (DataType): The data type of the input parameter. Must be one of: + TEXT, BOOLEAN, or NUMBER. Defaults to DataType.TEXT. + """ + name: Text description: Text type: DataType = DataType.TEXT def validate(self): + """Validate that the input parameter has a supported data type. + + Raises: + ValueError: If the type is not one of: TEXT, BOOLEAN, or NUMBER. + """ if self.type not in [DataType.TEXT, DataType.BOOLEAN, DataType.NUMBER]: raise ValueError("Utility Model Input type must be TEXT, BOOLEAN or NUMBER") def to_dict(self): + """Convert the input parameter to a dictionary representation. + + Returns: + dict: A dictionary containing the input parameter's name, description, + and type (as a string value). + """ return {"name": self.name, "description": self.description, "type": self.type.value} @@ -137,23 +171,45 @@ def __init__( function_type: Optional[FunctionType] = FunctionType.UTILITY, **additional_info, ) -> None: - """Utility Model Init + """Initialize a new UtilityModel instance. Args: - id (Text): ID of the Model - name (Text): Name of the Model - code (Union[Text, Callable]): code of the model. - description (Text): description of the model. Defaults to "". - inputs (List[UtilityModelInput]): inputs of the model. Defaults to []. - output_examples (Text): output examples. Defaults to "". - api_key (Text, optional): API key of the Model. Defaults to None. - supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". - version (Text, optional): version of the model. Defaults to "1.0". - function (Function, optional): model AI function. Defaults to None. - is_subscribed (bool, optional): Is the user subscribed. Defaults to False. - cost (Dict, optional): model price. Defaults to None. - function_type (FunctionType, optional): type of the function. Defaults to FunctionType.UTILITY. - **additional_info: Any additional Model info to be saved + id (Text): ID of the utility model. + name (Optional[Text], optional): Name of the utility model. If not provided, + will be extracted from the code if decorated. Defaults to None. + code (Union[Text, Callable], optional): Implementation code, either as a string + or a callable function. Defaults to None. + description (Optional[Text], optional): Description of what the model does. + If not provided, will be extracted from the code if decorated. + Defaults to None. + inputs (List[UtilityModelInput], optional): List of input parameters the + model accepts. If not provided, will be extracted from the code if + decorated. Defaults to []. + output_examples (Text, optional): Examples of the model's expected outputs. + Defaults to "". + api_key (Optional[Text], optional): API key for accessing the model. + Defaults to None. + supplier (Union[Dict, Text, Supplier, int], optional): Supplier of the model. + Defaults to "aiXplain". + version (Optional[Text], optional): Version of the model. Defaults to None. + function (Optional[Function], optional): Function type. Must be + Function.UTILITIES. Defaults to None. + is_subscribed (bool, optional): Whether the user is subscribed. + Defaults to False. + cost (Optional[Dict], optional): Cost information for the model. + Defaults to None. + status (AssetStatus, optional): Current status of the model. + Defaults to AssetStatus.DRAFT. + function_type (Optional[FunctionType], optional): Type of the function. + Defaults to FunctionType.UTILITY. + **additional_info: Any additional model info to be saved. + + Raises: + AssertionError: If function is not Function.UTILITIES. + + Note: + Non-deployed utility models (status=DRAFT) will expire after 24 hours. + Use the .deploy() method to make the model permanent. """ assert function == Function.UTILITIES, "Utility Model only supports 'utilities' function" super().__init__( @@ -190,7 +246,12 @@ def __init__( ) def validate(self): - """Validate the Utility Model.""" + """Validate the Utility Model. + + This method checks if the utility model exists in the backend and if the code is a string with s3://. + If not, it parses the code and updates the description and inputs and does the validation. + If yes, it just does the validation on the description and inputs. + """ description = None name = None # check if the model exists and if the code is strring with s3:// @@ -213,6 +274,18 @@ def validate(self): assert self.code and self.code.strip() != "", "Code is required" def _model_exists(self): + """Check if the utility model exists in the backend. + + This internal method verifies whether a model with the current ID exists + by making a GET request to the backend API. + + Returns: + bool: True if the model exists and is accessible, False if the ID is + empty or None. + + Raises: + Exception: If the API request fails or returns a non-200 status code. + """ if self.id is None or self.id == "": return False url = urljoin(self.backend_url, f"sdk/models/{self.id}") @@ -224,6 +297,21 @@ def _model_exists(self): return True def to_dict(self): + """Convert the utility model to a dictionary representation. + + This method creates a dictionary containing all the essential information + about the utility model, suitable for API requests or serialization. + + Returns: + dict: A dictionary containing: + - name (str): The model's name + - description (str): The model's description + - inputs (List[dict]): List of input parameters as dictionaries + - code (Union[str, Callable]): The model's implementation code + - function (str): The function type as a string value + - outputDescription (str): Examples of expected outputs + - status (str): Current status as a string value + """ return { "name": self.name, "description": self.description, @@ -235,7 +323,13 @@ def to_dict(self): } def update(self): - """Update the Utility Model.""" + """Update the Utility Model. + + This method validates the utility model and updates it in the backend. + + Raises: + Exception: If the update fails. + """ import warnings import inspect @@ -266,11 +360,17 @@ def update(self): raise Exception(f"{message}") def save(self): - """Save the Utility Model.""" + """Save the Utility Model. + + This method updates the utility model in the backend. + """ self.update() def delete(self): - """Delete the Utility Model.""" + """Delete the Utility Model. + + This method deletes the utility model from the backend. + """ url = urljoin(self.backend_url, f"sdk/utilities/{self.id}") headers = {"x-api-key": f"{self.api_key}", "Content-Type": "application/json"} try: @@ -288,6 +388,13 @@ def delete(self): raise Exception(f"{message}") def __repr__(self): + """Return a string representation of the UtilityModel instance. + + Returns: + str: A string in the format "UtilityModel: by (id=)". + If supplier is a dictionary, uses supplier['name'], otherwise uses + supplier directly. + """ try: return f"UtilityModel: {self.name} by {self.supplier['name']} (id={self.id})" except Exception: diff --git a/aixplain/modules/model/utils.py b/aixplain/modules/model/utils.py index cc68a347..4c1b6239 100644 --- a/aixplain/modules/model/utils.py +++ b/aixplain/modules/model/utils.py @@ -8,6 +8,30 @@ def build_payload(data: Union[Text, Dict], parameters: Optional[Dict] = None, stream: Optional[bool] = None): + """Build a JSON payload for API requests. + + This function constructs a JSON payload by combining input data with optional + parameters and streaming configuration. It handles various input formats and + ensures proper JSON serialization. + + Args: + data (Union[Text, Dict]): The primary data to include in the payload. + Can be a string (which may be JSON) or a dictionary. + parameters (Optional[Dict], optional): Additional parameters to include + in the payload. Defaults to None. + stream (Optional[bool], optional): Whether to enable streaming for this + request. If provided, adds streaming configuration to parameters. + Defaults to None. + + Returns: + str: A JSON string containing the complete payload with all parameters + and data properly formatted. + + Note: + - If data is a string that can be parsed as JSON, it will be. + - If data is a number (after JSON parsing), it will be converted to string. + - The function ensures the result is a valid JSON string. + """ from aixplain.factories import FileFactory if parameters is None: @@ -37,6 +61,29 @@ def build_payload(data: Union[Text, Dict], parameters: Optional[Dict] = None, st def call_run_endpoint(url: Text, api_key: Text, payload: Dict) -> Dict: + """Call a model execution endpoint and handle the response. + + This function makes a POST request to a model execution endpoint, handles + various response scenarios, and provides appropriate error handling. + + Args: + url (Text): The endpoint URL to call. + api_key (Text): API key for authentication. + payload (Dict): The request payload to send. + + Returns: + Dict: A response dictionary containing: + - status (str): "IN_PROGRESS", "SUCCESS", or "FAILED" + - completed (bool): Whether the request is complete + - url (str, optional): Polling URL for async requests + - data (Any, optional): Response data if available + - error_message (str, optional): Error message if failed + + Note: + - For async operations, returns a polling URL in the 'url' field + - For failures, includes an error message and sets status to "FAILED" + - Handles both API errors and request exceptions + """ headers = {"x-api-key": api_key, "Content-Type": "application/json"} resp = "unspecified error" @@ -78,6 +125,37 @@ def call_run_endpoint(url: Text, api_key: Text, payload: Dict) -> Dict: def parse_code(code: Union[Text, Callable]) -> Tuple[Text, List, Text, Text]: + """Parse and process code for utility model creation. + + This function takes code input in various forms (callable, file path, URL, or + string) and processes it for use in a utility model. It extracts metadata, + validates the code structure, and prepares it for execution. + + Args: + code (Union[Text, Callable]): The code to parse. Can be: + - A callable function + - A file path (string) + - A URL (string) + - Raw code (string) + + Returns: + Tuple[Text, List, Text, Text]: A tuple containing: + - code (Text): The processed code, uploaded to storage + - inputs (List[UtilityModelInput]): List of extracted input parameters + - description (Text): Function description from docstring + - name (Text): Function name + + Raises: + Exception: If the code doesn't have a main function + AssertionError: If input types are not properly specified + Exception: If an input type is not supported (must be int, float, bool, or str) + + Note: + - The function requires a 'main' function in the code + - Input parameters must have type annotations + - Supported input types are: int, float, bool, str + - The code is uploaded to temporary storage for later use + """ import inspect import os import re @@ -157,7 +235,45 @@ def parse_code(code: Union[Text, Callable]) -> Tuple[Text, List, Text, Text]: return code, inputs, description, name -def parse_code_decorated(code: Union[Text, Callable]) -> Tuple[Text, List, Text]: +def parse_code_decorated(code: Union[Text, Callable]) -> Tuple[Text, List, Text, Text]: + """Parse and process code that may be decorated with @utility_tool. + + This function handles code that may be decorated with the @utility_tool + decorator, extracting metadata from either the decorator or the code itself. + It supports various input formats and provides robust parameter extraction. + + Args: + code (Union[Text, Callable]): The code to parse. Can be: + - A decorated callable function + - A non-decorated callable function + - A file path (string) + - A URL (string) + - Raw code (string) + + Returns: + Tuple[Text, List, Text, Text]: A tuple containing: + - code (Text): The processed code, uploaded to storage + - inputs (List[UtilityModelInput]): List of extracted input parameters + - description (Text): Function description from decorator or docstring + - name (Text): Function name from decorator or code + + Raises: + TypeError: If code is a class or class instance + AssertionError: If input types are not properly specified + Exception: In various cases: + - If code doesn't have a function definition + - If code has invalid @utility_tool decorator + - If input type is not supported + - If code parsing fails + + Note: + - Handles both decorated and non-decorated code + - For decorated code, extracts metadata from decorator + - For non-decorated code, falls back to code parsing + - Renames the function to 'main' for backend compatibility + - Supports TEXT, BOOLEAN, and NUMBER input types + - Uploads processed code to temporary storage + """ import inspect import os import re @@ -344,4 +460,23 @@ def parse_code_decorated(code: Union[Text, Callable]) -> Tuple[Text, List, Text] def is_supported_image_type(value: str) -> bool: + """Check if a file path or URL points to a supported image format. + + This function checks if the provided string ends with a supported image + file extension. The check is case-insensitive. + + Args: + value (str): The file path or URL to check. + + Returns: + bool: True if the file has a supported image extension, False otherwise. + + Note: + Supported image formats are: + - JPEG (.jpg, .jpeg) + - PNG (.png) + - GIF (.gif) + - BMP (.bmp) + - WebP (.webp) + """ return any(value.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"]) diff --git a/aixplain/modules/pipeline/asset.py b/aixplain/modules/pipeline/asset.py index 3f585315..7618b955 100644 --- a/aixplain/modules/pipeline/asset.py +++ b/aixplain/modules/pipeline/asset.py @@ -196,6 +196,45 @@ def run( response_version: Text = "v2", **kwargs, ) -> Union[Dict, PipelineResponse]: + """Run the pipeline synchronously and wait for results. + + This method executes the pipeline with the provided input data and waits + for completion. It handles both direct data input and data assets, with + support for polling and timeout. + + Args: + data (Union[Text, Dict]): The input data for the pipeline. Can be: + - A string (file path, URL, or raw data) + - A dictionary mapping node labels to input data + data_asset (Optional[Union[Text, Dict]], optional): Data asset(s) to + process. Can be a single asset ID or a dict mapping node labels + to asset IDs. Defaults to None. + name (Text, optional): Identifier for this pipeline run. Used for + logging. Defaults to "pipeline_process". + timeout (float, optional): Maximum time in seconds to wait for + completion. Defaults to 20000.0. + wait_time (float, optional): Initial time in seconds between polling + attempts. May increase over time. Defaults to 1.0. + version (Optional[Text], optional): Specific pipeline version to run. + Defaults to None. + response_version (Text, optional): Response format version ("v1" or + "v2"). Defaults to "v2". + **kwargs: Additional keyword arguments passed to the pipeline. + + Returns: + Union[Dict, PipelineResponse]: If response_version is: + - "v1": Dictionary with status, error (if any), and elapsed time + - "v2": PipelineResponse object with structured response data + + Raises: + Exception: If the pipeline execution fails, times out, or encounters + errors during polling. + + Note: + - The method starts with run_async and then polls for completion + - wait_time may increase up to 60 seconds between polling attempts + - For v2 responses, use PipelineResponse methods to access results + """ start = time.time() try: response = self.run_async(data, data_asset=data_asset, name=name, version=version, **kwargs) @@ -256,14 +295,38 @@ def __prepare_payload( data: Union[Text, Dict], data_asset: Optional[Union[Text, Dict]] = None, ) -> Dict: - """Prepare pipeline execution payload, validating the input data + """Prepare and validate the pipeline execution payload. + + This internal method processes input data and data assets into a format + suitable for pipeline execution. It handles various input formats and + performs validation of data assets. Args: - data (Union[Text, Dict]): input data - data_asset (Optional[Union[Text, Dict]], optional): input data asset. Defaults to None. + data (Union[Text, Dict]): The input data to process. Can be: + - A string (file path, URL, or raw data) + - A dictionary mapping node labels to input data + data_asset (Optional[Union[Text, Dict]], optional): Data asset(s) to + process. Can be: + - A single asset ID (string) + - A dictionary mapping node labels to asset IDs + Defaults to None. Returns: - Dict: pipeline execution payload + Dict: A formatted payload containing: + - data: List of node inputs with values and IDs + - dataAsset (if applicable): Asset references with corpus/dataset IDs + + Raises: + Exception: In various cases: + - If data and data_asset format mismatch + - If specified data asset doesn't exist + - If specified data isn't found in the data asset + - If data format is invalid + + Note: + - For data assets, validates both asset existence and data presence + - Handles both single-input and multi-input scenarios + - Automatically uploads local files to temporary storage """ from aixplain.factories import ( CorpusFactory, @@ -509,7 +572,23 @@ def update( raise Exception(e) def delete(self) -> None: - """Delete Dataset service""" + """Delete this pipeline from the platform. + + This method permanently removes the pipeline from the aiXplain platform. + The operation cannot be undone. + + Raises: + Exception: If deletion fails, which can happen if: + - The pipeline doesn't exist + - The user doesn't have permission to delete it + - The API request fails + - The server returns a non-200 status code + + Note: + - This operation is permanent and cannot be undone + - Only the pipeline owner can delete it + - Uses the team API key for authentication + """ try: url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}") headers = { @@ -601,4 +680,9 @@ def deploy(self, api_key: Optional[Text] = None) -> None: raise Exception(f"Error deploying because of backend error: {e}") from e def __repr__(self): + """Return a string representation of the Pipeline instance. + + Returns: + str: A string in the format "Pipeline: (id=)". + """ return f"Pipeline: {self.name} (id={self.id})" diff --git a/aixplain/modules/pipeline/default.py b/aixplain/modules/pipeline/default.py index 60d879f4..e1437e4d 100644 --- a/aixplain/modules/pipeline/default.py +++ b/aixplain/modules/pipeline/default.py @@ -4,15 +4,27 @@ class DefaultPipeline(PipelineAsset, DesignerPipeline): + """ + DefaultPipeline is a subclass of PipelineAsset and DesignerPipeline. + """ def __init__(self, *args, **kwargs): + """ + Initialize the DefaultPipeline. + """ PipelineAsset.__init__(self, *args, **kwargs) DesignerPipeline.__init__(self) def save(self, *args, **kwargs): + """ + Save the DefaultPipeline. + """ self.auto_infer() self.validate() super().save(*args, **kwargs) def to_dict(self) -> dict: + """ + Convert the DefaultPipeline to a dictionary. + """ return self.serialize() \ No newline at end of file diff --git a/aixplain/modules/pipeline/response.py b/aixplain/modules/pipeline/response.py index deda345d..52319f76 100644 --- a/aixplain/modules/pipeline/response.py +++ b/aixplain/modules/pipeline/response.py @@ -5,6 +5,20 @@ @dataclass class PipelineResponse: + """A response object for pipeline operations. + + This class encapsulates the response from pipeline operations, including + status, error information, timing data, and any additional fields. + + Attributes: + status (ResponseStatus): The status of the pipeline operation. + error (Optional[Dict[str, Any]]): Error details if operation failed. + elapsed_time (Optional[float]): Time taken to complete the operation. + data (Optional[Text]): The main response data. + url (Optional[Text]): URL for polling or accessing results. + additional_fields (Dict[str, Any]): Any extra fields provided. + """ + def __init__( self, status: ResponseStatus, @@ -14,6 +28,20 @@ def __init__( url: Optional[Text] = "", **kwargs, ): + """Initialize a new PipelineResponse instance. + + Args: + status (ResponseStatus): The status of the pipeline operation. + error (Optional[Dict[str, Any]], optional): Error details if operation + failed. Defaults to None. + elapsed_time (Optional[float], optional): Time taken to complete the + operation in seconds. Defaults to 0.0. + data (Optional[Text], optional): The main response data. + Defaults to None. + url (Optional[Text], optional): URL for polling or accessing results. + Defaults to "". + **kwargs: Additional fields to store in the response. + """ self.status = status self.error = error self.elapsed_time = elapsed_time @@ -22,18 +50,63 @@ def __init__( self.url = url def __getattr__(self, key: str) -> Any: + """Get an attribute from additional_fields if it exists. + + This method is called when an attribute lookup has not found the + attribute in the usual places (i.e., it is not an instance attribute + nor found through the __mro__ chain). + + Args: + key (str): The name of the attribute to get. + + Returns: + Any: The value from additional_fields. + + Raises: + AttributeError: If the key is not found in additional_fields. + """ if self.additional_fields and key in self.additional_fields: return self.additional_fields[key] raise AttributeError() def get(self, key: str, default: Any = None) -> Any: + """Get an attribute value with a default if not found. + + Args: + key (str): The name of the attribute to get. + default (Any, optional): Value to return if key is not found. + Defaults to None. + + Returns: + Any: The attribute value or default if not found. + """ return getattr(self, key, default) def __getitem__(self, key: str) -> Any: + """Get an attribute value using dictionary-style access. + + This method enables dictionary-style access to attributes + (e.g., response["status"]). + + Args: + key (str): The name of the attribute to get. + + Returns: + Any: The attribute value. + + Raises: + AttributeError: If the key is not found. + """ return getattr(self, key) def __repr__(self) -> str: + """Return a string representation of the PipelineResponse. + + Returns: + str: A string in the format "PipelineResponse(status=X, error=Y, ...)" + containing all non-empty fields. + """ fields = [] if self.status: fields.append(f"status={self.status}") @@ -48,4 +121,15 @@ def __repr__(self) -> str: return f"PipelineResponse({', '.join(fields)})" def __contains__(self, key: str) -> bool: + """Check if an attribute exists using 'in' operator. + + This method enables using the 'in' operator to check for attribute + existence (e.g., "status" in response). + + Args: + key (str): The name of the attribute to check. + + Returns: + bool: True if the attribute exists, False otherwise. + """ return hasattr(self, key) diff --git a/aixplain/modules/team_agent/__init__.py b/aixplain/modules/team_agent/__init__.py index 078285d2..16da1595 100644 --- a/aixplain/modules/team_agent/__init__.py +++ b/aixplain/modules/team_agent/__init__.py @@ -49,11 +49,26 @@ class InspectorTarget(str, Enum): + """Target stages for inspector validation in the team agent pipeline. + + This enumeration defines the stages where inspectors can be applied to + validate and ensure quality of the team agent's operation. + + Attributes: + INPUT: Validates the input data before processing. + STEPS: Validates intermediate steps during processing. + OUTPUT: Validates the final output before returning. + """ INPUT = "input" STEPS = "steps" OUTPUT = "output" def __str__(self): + """Return the string value of the enum member. + + Returns: + str: The string value associated with the enum member. + """ return self._value_ @@ -336,7 +351,24 @@ def delete(self) -> None: raise Exception(f"{message}") def _serialize_agent(self, agent, idx: int) -> Dict: - """Serialize an agent for the to_dict method.""" + """Serialize an agent for the to_dict method. + + This internal method converts an agent object into a dictionary format + suitable for serialization, including its base properties and any + additional data from the agent's own to_dict method. + + Args: + agent: The agent object to serialize. + idx (int): The index position of the agent in the team. + + Returns: + Dict: A dictionary containing the serialized agent data with: + - assetId: The agent's ID + - number: The agent's index position + - type: Always "AGENT" + - label: Always "AGENT" + - Additional fields from agent.to_dict() if available + """ base_dict = {"assetId": agent.id, "number": idx, "type": "AGENT", "label": "AGENT"} # Try to get additional data from agent's to_dict method @@ -359,6 +391,29 @@ def _serialize_agent(self, agent, idx: int) -> Dict: return base_dict def to_dict(self) -> Dict: + """Convert the TeamAgent instance to a dictionary representation. + + This method serializes the TeamAgent and all its components (agents, + inspectors, LLMs, etc.) into a dictionary format suitable for storage + or transmission. + + Returns: + Dict: A dictionary containing: + - id (str): The team agent's ID + - name (str): The team agent's name + - agents (List[Dict]): Serialized list of agents + - links (List): Empty list (reserved for future use) + - description (str): The team agent's description + - llmId (str): ID of the main language model + - supervisorId (str): ID of the supervisor language model + - plannerId (str): ID of the planner model (if use_mentalist) + - inspectors (List[Dict]): Serialized list of inspectors + - inspectorTargets (List[str]): List of inspector target stages + - supplier (str): The supplier code + - version (str): The version number + - status (str): The current status + - role (str): The team agent's instructions + """ if self.use_mentalist: planner_id = self.mentalist_llm.id if self.mentalist_llm else self.llm_id else: @@ -501,6 +556,28 @@ def _validate(self) -> None: agent.validate(raise_exception=True) def validate(self, raise_exception: bool = False) -> bool: + """Validate the TeamAgent configuration. + + This method checks the validity of the TeamAgent's configuration, + including name format, LLM compatibility, and agent validity. + + Args: + raise_exception (bool, optional): If True, raises exceptions for + validation failures. If False, logs warnings. Defaults to False. + + Returns: + bool: True if validation succeeds, False otherwise. + + Raises: + Exception: If raise_exception is True and validation fails, with + details about the specific validation error. + + Note: + - The team agent cannot be run until all validation issues are fixed + - Name must contain only alphanumeric chars, spaces, hyphens, brackets + - LLM must be a text generation model + - All agents must pass their own validation + """ try: self._validate() self.is_valid = True @@ -515,7 +592,24 @@ def validate(self, raise_exception: bool = False) -> bool: return self.is_valid def update(self) -> None: - """Update the Team Agent.""" + """Update the TeamAgent in the backend. + + This method validates and updates the TeamAgent's configuration in the + backend system. It is deprecated in favor of the save() method. + + Raises: + Exception: If validation fails or if the update request fails. + Specific error messages will indicate: + - Validation failures with details + - HTTP errors with status codes + - General update errors requiring admin attention + + Note: + - This method is deprecated, use save() instead + - Performs validation before attempting update + - Requires valid team API key for authentication + - Returns a new TeamAgent instance if successful + """ import warnings import inspect @@ -550,4 +644,9 @@ def update(self) -> None: raise Exception(error_msg) def __repr__(self): + """Return a string representation of the TeamAgent. + + Returns: + str: A string in the format "TeamAgent: (id=)". + """ return f"TeamAgent: {self.name} (id={self.id})" diff --git a/aixplain/modules/team_agent/inspector.py b/aixplain/modules/team_agent/inspector.py index ab4fb79f..8cd12cc0 100644 --- a/aixplain/modules/team_agent/inspector.py +++ b/aixplain/modules/team_agent/inspector.py @@ -36,6 +36,14 @@ class InspectorAuto(str, Enum): CORRECTNESS = "correctness" def get_name(self) -> Text: + """Get the standardized name for this inspector type. + + This method generates a consistent name for the inspector by prefixing + the enum value with "inspector_". + + Returns: + Text: The inspector name in the format "inspector_". + """ return "inspector_" + self.value @@ -65,12 +73,44 @@ class Inspector(ModelWithParams): policy: InspectorPolicy = InspectorPolicy.ADAPTIVE def __init__(self, *args, **kwargs): + """Initialize an Inspector instance. + + This method initializes an inspector with either a custom model or an + automatic configuration. If auto is specified, it uses the default + auto model ID. + + Args: + *args: Variable length argument list passed to parent class. + **kwargs: Arbitrary keyword arguments. Supported keys: + - name (Text): The inspector's name + - model_id (Text): The model ID to use + - model_params (Dict, optional): Model configuration + - auto (InspectorAuto, optional): Auto configuration type + - policy (InspectorPolicy, optional): Inspector policy + + Note: + If auto is specified in kwargs, model_id is automatically set to + AUTO_DEFAULT_MODEL_ID. + """ if kwargs.get("auto"): kwargs["model_id"] = AUTO_DEFAULT_MODEL_ID super().__init__(*args, **kwargs) @field_validator("name") def validate_name(cls, v: Text) -> Text: + """Validate the inspector name field. + + This validator ensures that the inspector's name is not empty. + + Args: + v (Text): The name value to validate. + + Returns: + Text: The validated name value. + + Raises: + ValueError: If the name is an empty string. + """ if v == "": raise ValueError("name cannot be empty") return v diff --git a/aixplain/modules/wallet.py b/aixplain/modules/wallet.py index e05e2d3a..3a2956d2 100644 --- a/aixplain/modules/wallet.py +++ b/aixplain/modules/wallet.py @@ -23,12 +23,23 @@ class Wallet: + """A class representing a wallet for managing credit balances. + + This class provides functionality for managing credit balances in a wallet, + including total, reserved, and available balances. It is used to track and + manage credit resources in the aiXplain platform. + + Attributes: + total_balance (float): Total credit balance in the wallet. + reserved_balance (float): Reserved credit balance in the wallet. + available_balance (float): Available balance (total - reserved). + """ def __init__(self, total_balance: float, reserved_balance: float): - """ + """Initialize a new Wallet instance. + Args: - total_balance (float): total credit balance - reserved_balance (float): reserved credit balance - available_balance (float): available balance (total - credit) + total_balance (float): Total credit balance in the wallet. + reserved_balance (float): Reserved credit balance in the wallet. """ self.total_balance = total_balance self.reserved_balance = reserved_balance diff --git a/aixplain/processes/data_onboarding/onboard_functions.py b/aixplain/processes/data_onboarding/onboard_functions.py index 09d1b153..d599c1bf 100644 --- a/aixplain/processes/data_onboarding/onboard_functions.py +++ b/aixplain/processes/data_onboarding/onboard_functions.py @@ -38,13 +38,22 @@ def get_paths(input_paths: List[Union[str, Path]]) -> List[Path]: - """Recursively access all local paths. Check if file extensions are supported. + """Recursively collect all supported local file paths from the given input paths. + + This function traverses through the provided paths, which can be files or directories, + and collects paths to all supported files (currently only CSV files). It also performs + size validation to ensure files don't exceed 1GB. Args: - input_paths (List[Union[str, Path]]): list of input pahts including folders and files + input_paths (List[Union[str, Path]]): List of input paths. Can include both + individual file paths and directory paths. Returns: - List[Path]: list of local file paths + List[Path]: List of validated local file paths that are supported. + + Raises: + AssertionError: If any CSV file exceeds 1GB in size. + Warning: If a file has an unsupported extension. """ paths = [] for path in input_paths: @@ -74,16 +83,27 @@ def get_paths(input_paths: List[Union[str, Path]]) -> List[Path]: def process_data_files( data_asset_name: str, metadata: MetaData, paths: List, folder: Optional[Union[str, Path]] = None ) -> Tuple[List[File], int, int, int, int]: - """Process a list of local files, compress and upload them to pre-signed URLs in S3 + """Process data files based on their type and prepare them for upload to S3. + + This function handles different types of data files (audio, image, text, etc.) + by delegating to appropriate processing modules. It compresses the files if needed + and prepares them for upload to S3. Args: - data_asset_name (str): name of the data asset - metadata (MetaData): meta data of the asset - paths (List): list of paths to local files - folder (Union[str, Path], optional): local folder to save compressed files before upload them to s3. Defaults to data_asset_name. + data_asset_name (str): Name of the data asset being processed. + metadata (MetaData): Metadata object containing type and subtype information + for the data being processed. + paths (List): List of paths to local files that need processing. + folder (Optional[Union[str, Path]], optional): Local folder to save processed + files before uploading to S3. If None, uses data_asset_name. Defaults to None. Returns: - Tuple[List[File], int, int, int]: list of s3 links; data, start and end columns index; and number of rows + Tuple[List[File], int, int, int, int]: A tuple containing: + - List[File]: List of processed file objects ready for S3 upload + - int: Index of the data column + - int: Index of the start column (for intervals) + - int: Index of the end column (for intervals) + - int: Total number of rows processed """ if folder is None: folder = Path(data_asset_name) @@ -107,13 +127,26 @@ def process_data_files( def build_payload_data(data: Data) -> Dict: - """Create data payload to call coreengine on Corpus/Dataset onboard + """Build a payload dictionary for data onboarding to the core engine. + + This function creates a standardized payload structure for onboarding data + to the core engine. It includes data properties, file information, and metadata + such as languages and column mappings. Args: - data (Data): data object + data (Data): Data object containing information about the data to be onboarded, + including name, type, files, and language information. Returns: - Dict: payload + Dict: A dictionary containing the formatted payload with the following key fields: + - name: Name of the data + - dataColumn: Column identifier for the data + - dataType: Type of the data + - dataSubtype: Subtype of the data + - batches: List of file information with paths and order + - tags: List of descriptive tags + - metaData: Additional metadata including languages + Additional fields may be added for interval data (start/end columns). """ data_json = { "name": data.name, @@ -139,15 +172,31 @@ def build_payload_data(data: Data) -> Dict: def build_payload_corpus(corpus: Corpus, ref_data: List[Text], error_handler: ErrorHandler) -> Dict: - """Create corpus payload to call coreengine on the onboard process + """Build a payload dictionary for corpus onboarding to the core engine. + + This function creates a standardized payload structure for onboarding a corpus, + including all its associated data, metadata, and configuration settings. Args: - corpus (Corpus): corpus object - ref_data (List[Text]): list of referred data - error_handler (ErrorHandler): how to handle failed rows + corpus (Corpus): Corpus object containing the data collection to be onboarded, + including name, description, functions, and associated data. + ref_data (List[Text]): List of referenced data IDs that this corpus depends on + or is related to. + error_handler (ErrorHandler): Configuration for how to handle rows that fail + during the onboarding process. Returns: - Dict: payload + Dict: A dictionary containing the formatted payload with the following key fields: + - name: Name of the corpus + - description: Description of the corpus + - suggestedFunctions: List of suggested AI functions + - onboardingErrorsPolicy: Error handling policy + - tags: List of descriptive tags + - pricing: Pricing configuration + - privacy: Privacy settings + - license: License information + - refData: Referenced data IDs + - data: List of data payloads for each data component """ payload = { "name": corpus.name, @@ -177,19 +226,35 @@ def build_payload_dataset( tags: List[Text], error_handler: ErrorHandler, ) -> Dict: - """Generate onboard payload to coreengine + """Build a payload dictionary for dataset onboarding to the core engine. + + This function creates a comprehensive payload structure for onboarding a dataset, + including all its components: input data, output data, hypotheses, and metadata. + It handles both new data and references to existing data. Args: - dataset (Dataset): dataset to be onboard - input_ref_data (Dict[Text, Any]): reference to existent input data - output_ref_data (Dict[Text, List[Any]]): reference to existent output data - hypotheses_ref_data (Dict[Text, Any]): reference to existent hypotheses to the target data - meta_ref_data (Dict[Text, Any]): reference to existent metadata - tags (List[Text]): description tags - error_handler (ErrorHandler): how to handle failed rows + dataset (Dataset): Dataset object to be onboarded, containing all the data + components and configuration. + input_ref_data (Dict[Text, Any]): Dictionary mapping input names to existing + data IDs in the system. + output_ref_data (Dict[Text, List[Any]]): Dictionary mapping output names to + lists of existing data IDs for multi-reference outputs. + hypotheses_ref_data (Dict[Text, Any]): Dictionary mapping hypothesis names to + existing data IDs for model outputs or predictions. + meta_ref_data (Dict[Text, Any]): Dictionary mapping metadata names to existing + metadata IDs in the system. + tags (List[Text]): List of descriptive tags for the dataset. + error_handler (ErrorHandler): Configuration for how to handle rows that fail + during the onboarding process. Returns: - Dict: onboard payload + Dict: A dictionary containing the formatted payload with the following sections: + - Basic information (name, description, function, etc.) + - Configuration (error handling, privacy, license) + - Input data section with both new and referenced inputs + - Output data section with both new and referenced outputs + - Hypotheses section with both new and referenced hypotheses + - Metadata section with both new and referenced metadata """ # compute ref data flat_input_ref_data = list(input_ref_data.values()) @@ -288,15 +353,29 @@ def build_payload_dataset( def create_data_asset(payload: Dict, data_asset_type: Text = "corpus", api_key: Optional[Text] = None) -> Dict: - """Service to call onboard process in coreengine + """Create a new data asset (corpus or dataset) in the core engine. + + This function sends the onboarding request to the core engine and handles the response. + It supports both corpus and dataset creation with proper authentication. Args: - payload (Dict): onboard payload - data_asset_type (Text, optional): corpus or dataset. Defaults to "corpus". - api_key (Optional[Text]): team api key. Defaults to None. + payload (Dict): The complete payload for the data asset, containing all necessary + information for onboarding (structure depends on data_asset_type). + data_asset_type (Text, optional): Type of data asset to create. Must be either + "corpus" or "dataset". Defaults to "corpus". + api_key (Optional[Text], optional): Team API key for authentication. If None, + uses the default key from config. Defaults to None. Returns: - Dict: onboard status + Dict: A dictionary containing the onboarding status with the following fields: + - success (bool): Whether the operation was successful + - asset_id (str): ID of the created asset (if successful) + - status (str): Current status of the asset (if successful) + - error (str): Error message (if not successful) + + Note: + The function handles both successful and failed responses, providing appropriate + error messages in case of failure. """ if api_key is not None: team_key = api_key @@ -332,13 +411,22 @@ def create_data_asset(payload: Dict, data_asset_type: Text = "corpus", api_key: def is_data(data_id: Text) -> bool: - """Check whether reference data exists + """Check if a data object exists in the system by its ID. + + This function makes an API call to verify the existence of a data object + in the system. It's typically used to validate references before creating + new assets that depend on existing data. Args: - data_id (Text): ID of the data + data_id (Text): The ID of the data object to check. Returns: - bool: True if it exists, False otherwise + bool: True if the data exists and is accessible, False otherwise. + Returns False in case of API errors or if the data is not found. + + Note: + The function handles API errors gracefully, returning False instead + of raising exceptions. """ try: api_key = config.TEAM_API_KEY @@ -356,15 +444,31 @@ def is_data(data_id: Text) -> bool: def split_data(paths: List, split_rate: List[float], split_labels: List[Text]) -> MetaData: - """Split the data according to some split labels and rate + """Split data files into partitions based on specified rates and labels. + + This function adds a new column to CSV files to indicate the split assignment + for each row. It randomly assigns rows to splits based on the provided rates. + The function tries to find an unused column name for the split information. Args: - paths (List): path to data files - split_rate (List[Text]): split rate - split_labels (List[Text]): split labels + paths (List): List of paths to CSV files that need to be split. + split_rate (List[float]): List of proportions for each split. Should sum to 1.0. + For example, [0.8, 0.1, 0.1] for train/dev/test split. + split_labels (List[Text]): List of labels corresponding to each split rate. + For example, ["train", "dev", "test"]. Returns: - MetaData: metadata of the new split + MetaData: A metadata object for the new split column with: + - name: The generated column name for the split + - dtype: Set to DataType.LABEL + - dsubtype: Set to DataSubtype.SPLIT + - storage_type: Set to StorageType.TEXT + + Raises: + Exception: If no available column name is found or if file operations fail. + + Note: + The function modifies the input CSV files in place, adding the new split column. """ # get column name column_name = None diff --git a/aixplain/processes/data_onboarding/process_media_files.py b/aixplain/processes/data_onboarding/process_media_files.py index f0200165..f1076a2b 100644 --- a/aixplain/processes/data_onboarding/process_media_files.py +++ b/aixplain/processes/data_onboarding/process_media_files.py @@ -22,7 +22,19 @@ IMAGE_TEXT_MAX_SIZE = 25000000 -def compress_folder(folder_path: str): +def compress_folder(folder_path: str) -> str: + """Compress a folder into a gzipped tar archive. + + This function takes a folder and creates a compressed tar archive (.tgz) + containing all files in the folder. The archive is created in the same + directory as the input folder. + + Args: + folder_path (str): Path to the folder to be compressed. + + Returns: + str: Path to the created .tgz archive file. + """ with tarfile.open(folder_path + ".tgz", "w:gz") as tar: for name in os.listdir(folder_path): tar.add(os.path.join(folder_path, name)) @@ -30,21 +42,44 @@ def compress_folder(folder_path: str): def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) -> Tuple[List[File], int, int, int, int]: - """Process a list of local media files, compress and upload them to pre-signed URLs in S3 + """Process media files and prepare them for upload to S3 with batch processing. + + This function handles the processing and uploading of media files (audio, image, etc.) + to S3. It supports both local files and public URLs, processes them in batches, + and creates index files to track the media locations and any interval information. - Explanation: - Each media on "paths" is processed. If the media is in a public link, this link is added into an index CSV file. - If the media is in a local path, it will be copied into a local folder and its path will be added to the index CSV file. - The medias are processed in batches such that at each "batch_size" medias, the index CSV file is uploaded into a pre-signed URL in s3 and reset. - If the medias are stored locally, the local folder is compressed into a .tgz file and also uploaded into S3. + The process works as follows: + 1. For each media file in the input paths: + - If it's a public URL: Add the URL to an index CSV file + - If it's a local file: Copy to a temporary folder and add path to index + 2. After every batch_size files: + - For local files: Compress the folder into .tgz and upload to S3 + - Create and upload an index CSV file with paths and metadata + - Reset for the next batch Args: - metadata (MetaData): meta data of the asset - paths (List): list of paths to local files - folder (Path): local folder to save compressed files before upload them to s3. + metadata (MetaData): Metadata object containing information about the media type, + storage type, and column mappings. + paths (List): List of paths to CSV files containing media information. + folder (Path): Local folder path where temporary files and compressed archives + will be stored during processing. + batch_size (int, optional): Number of media files to process in each batch. + Defaults to 100. Returns: - Tuple[List[File], int, int, int]: list of s3 links; data, start and end columns index, and number of rows + Tuple[List[File], int, int, int, int]: A tuple containing: + - List[File]: List of File objects pointing to uploaded index files in S3 + - int: Index of the data column in the index CSV + - int: Index of the start column for intervals (-1 if not used) + - int: Index of the end column for intervals (-1 if not used) + - int: Total number of media files processed + + Raises: + Exception: If: + - Input files are not found + - Required columns are missing + - File size limits are exceeded (50MB for audio, 25MB for others) + - Invalid interval configurations are detected """ if metadata.dtype != DataType.LABEL: assert ( diff --git a/aixplain/processes/data_onboarding/process_text_files.py b/aixplain/processes/data_onboarding/process_text_files.py index e219b835..1ea6c2b9 100644 --- a/aixplain/processes/data_onboarding/process_text_files.py +++ b/aixplain/processes/data_onboarding/process_text_files.py @@ -16,14 +16,30 @@ def process_text(content: str, storage_type: StorageType) -> Text: - """Process text files + """Process text content based on its storage type and location. + + This function handles different types of text content: + - Local files: Reads the file content (with size validation) + - URLs: Marks them for non-download if they're public links + - Direct text: Uses the content as-is Args: - content (str): URL with text, local path with text or textual content - storage_type (StorageType): type of storage: URL, local path or textual content + content (str): The text content to process. Can be: + - A path to a local file + - A URL pointing to text content + - The actual text content + storage_type (StorageType): The type of storage for the content: + - StorageType.FILE for local files + - StorageType.TEXT for direct text content + - Other storage types for different handling Returns: - Text: textual content + Text: The processed text content. URLs may be prefixed with + "DONOTDOWNLOAD" if they should not be downloaded. + + Raises: + AssertionError: If a local text file exceeds 25MB in size. + IOError: If there are issues reading a local file. """ if storage_type == StorageType.FILE: # Check the size of file and assert a limit of 25 MB @@ -47,19 +63,44 @@ def process_text(content: str, storage_type: StorageType) -> Text: def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 1000) -> Tuple[List[File], int, int]: - """Process a list of local textual files, compress and upload them to pre-signed URLs in S3 - - Explanation: - Each text on "paths" is processed. If the text is in a public link or local file, it will be downloaded and added to an index CSV file. - The texts are processed in batches such that at each "batch_size" texts, the index CSV file is uploaded into a pre-signed URL in s3 and reset. + """Process text files in batches and upload them to S3 with index tracking. + + This function processes text files (either local or from URLs) in batches, + creating compressed CSV index files that track the text content and their + positions. The index files are then uploaded to S3. + + The process works as follows: + 1. For each input CSV file: + - Read the specified column containing text content/paths + - Process each text entry (read files, handle URLs) + - Add processed text to the current batch + 2. After every batch_size entries: + - Create a new index CSV with the processed texts + - Add row indices for tracking + - Compress and upload the index to S3 + - Start a new batch Args: - metadata (MetaData): meta data of the asset - paths (List): list of paths to local files - folder (Path): local folder to save compressed files before upload them to s3. + metadata (MetaData): Metadata object containing information about the text data, + including column names and storage type configuration. + paths (List): List of paths to CSV files containing the text data or + references to text content. + folder (Path): Local folder path where the generated index files will be + temporarily stored before upload. + batch_size (int, optional): Number of text entries to process in each batch. + Defaults to 1000. Returns: - Tuple[List[File], int, int]: list of s3 links, data colum index and number of rows + Tuple[List[File], int, int]: A tuple containing: + - List[File]: List of File objects pointing to uploaded index files in S3 + - int: Index of the data column in the index CSV files + - int: Total number of text entries processed + + Raises: + Exception: If: + - Input CSV files are not found + - Required columns are missing in input files + - Text processing fails (e.g., file size limit exceeded) """ logging.debug(f'Data Asset Onboarding: Processing "{metadata.name}".') idx = 0 diff --git a/aixplain/utils/asset_cache.py b/aixplain/utils/asset_cache.py index 357b70ef..3352f18d 100644 --- a/aixplain/utils/asset_cache.py +++ b/aixplain/utils/asset_cache.py @@ -2,7 +2,7 @@ import logging import json import time -from typing import Dict, Optional +from typing import Any, Dict, Optional from dataclasses import dataclass from filelock import FileLock @@ -22,20 +22,54 @@ @dataclass class Store(Generic[T]): + """A generic data store for cached assets with expiration time. + + This class serves as a container for cached data and its expiration timestamp. + It is used internally by AssetCache to store the cached assets. + + Attributes: + data (Dict[str, T]): Dictionary mapping asset IDs to their cached instances. + expiry (int): Unix timestamp when the cached data expires. + """ data: Dict[str, T] expiry: int class AssetCache(Generic[T]): - """ - A modular caching system to handle different asset types (Models, Pipelines, Agents). + """A modular caching system for aiXplain assets with file-based persistence. + + This class provides a generic caching mechanism for different types of assets + (Models, Pipelines, Agents, etc.) with automatic serialization, expiration, + and thread-safe file persistence. + + The cache uses JSON files for storage and implements file locking to ensure + thread safety. It also supports automatic cache invalidation based on + expiration time. + + Attributes: + cls (Type[T]): The class type of assets to be cached. + cache_file (str): Path to the JSON file storing the cached data. + lock_file (str): Path to the lock file for thread-safe operations. + store (Store[T]): The in-memory store containing cached data and expiry. + + Note: + The cached assets must be serializable to JSON and should implement + either a to_dict() method or have a standard __dict__ attribute. """ def __init__( self, cls: Type[T], cache_filename: Optional[str] = None, - ): + ) -> None: + """Initialize a new AssetCache instance. + + Args: + cls (Type[T]): The class type of assets to be cached. Must be + serializable to JSON. + cache_filename (Optional[str], optional): Base name for the cache file. + If None, uses lowercase class name. Defaults to None. + """ self.cls = cls if cache_filename is None: cache_filename = self.cls.__name__.lower() @@ -49,7 +83,20 @@ def __init__( if not os.path.exists(self.cache_file): self.save() - def compute_expiry(self): + def compute_expiry(self) -> int: + """Calculate the expiration timestamp for cached data. + + Uses CACHE_EXPIRY_TIME environment variable if set, otherwise falls back + to the default CACHE_DURATION. The expiry is calculated as current time + plus the duration. + + Returns: + int: Unix timestamp when the cache will expire. + + Note: + If CACHE_EXPIRY_TIME is invalid, it will be removed from environment + variables and the default duration will be used. + """ try: expiry = int(os.getenv("CACHE_EXPIRY_TIME", CACHE_DURATION)) except Exception as e: @@ -63,7 +110,14 @@ def compute_expiry(self): return time.time() + int(expiry) - def invalidate(self): + def invalidate(self) -> None: + """Clear the cache and remove cache files. + + This method: + 1. Resets the in-memory store with empty data and new expiry + 2. Deletes the cache file if it exists + 3. Deletes the lock file if it exists + """ self.store = Store(data={}, expiry=self.compute_expiry()) # delete cache file and lock file if os.path.exists(self.cache_file): @@ -71,7 +125,21 @@ def invalidate(self): if os.path.exists(self.lock_file): os.remove(self.lock_file) - def load(self): + def load(self) -> None: + """Load cached data from the cache file. + + This method reads the cache file (if it exists) and loads the data into + the in-memory store. It performs the following: + 1. Checks if cache file exists, if not, invalidates cache + 2. Uses file locking to ensure thread-safe reading + 3. Deserializes JSON data and converts to appropriate asset instances + 4. Checks expiration time and invalidates if expired + 5. Handles any errors by invalidating the cache + + Note: + If any errors occur during loading (file not found, invalid JSON, + deserialization errors), the cache will be invalidated. + """ if not os.path.exists(self.cache_file): self.invalidate() return @@ -103,7 +171,21 @@ def load(self): self.invalidate() return - def save(self): + def save(self) -> None: + """Save the current cache state to the cache file. + + This method serializes the current cache state to JSON and writes it + to the cache file. It performs the following: + 1. Creates the cache directory if it doesn't exist + 2. Uses file locking to ensure thread-safe writing + 3. Serializes each cached asset to a JSON-compatible format + 4. Writes the serialized data and expiry time to the cache file + + Note: + If serialization fails for any asset, that asset will be skipped + and an error will be logged, but the save operation will continue + for other assets. + """ os.makedirs(CACHE_FOLDER, exist_ok=True) @@ -123,23 +205,81 @@ def save(self): json.dump(serializable_store, f, indent=4) def get(self, asset_id: str) -> Optional[T]: + """Retrieve a cached asset by its ID. + + Args: + asset_id (str): The unique identifier of the asset to retrieve. + + Returns: + Optional[T]: The cached asset instance if found, None otherwise. + """ return self.store.data.get(asset_id) - def add(self, asset: T): + def add(self, asset: T) -> None: + """Add a single asset to the cache. + + Args: + asset (T): The asset instance to cache. Must have an 'id' attribute + and be serializable to JSON. + + Note: + This method automatically saves the updated cache to disk after + adding the asset. + """ self.store.data[asset.id] = asset.__dict__ self.save() - def add_list(self, assets: List[T]): + def add_list(self, assets: List[T]) -> None: + """Add multiple assets to the cache at once. + + This method replaces all existing cached assets with the new list. + + Args: + assets (List[T]): List of asset instances to cache. Each asset must + have an 'id' attribute and be serializable to JSON. + + Note: + This method automatically saves the updated cache to disk after + adding the assets. + """ self.store.data = {asset.id: asset for asset in assets} self.save() def get_all(self) -> List[T]: + """Retrieve all cached assets. + + Returns: + List[T]: List of all cached asset instances. Returns an empty list + if the cache is empty. + """ return list(self.store.data.values()) def has_valid_cache(self) -> bool: + """Check if the cache is valid and not expired. + + Returns: + bool: True if the cache has not expired and contains data, + False otherwise. + """ return self.store.expiry >= time.time() and bool(self.store.data) -def serialize(obj): +def serialize(obj: Any) -> Any: + """Convert a Python object into a JSON-serializable format. + + This function handles various Python types and converts them to formats + that can be serialized to JSON. It supports: + - Basic types (str, int, float, bool, None) + - Collections (list, tuple, set, dict) + - Objects with to_dict() method + - Objects with __dict__ attribute + - Other objects (converted to string) + + Args: + obj (Any): The Python object to serialize. + + Returns: + Any: A JSON-serializable version of the input object. + """ if isinstance(obj, (str, int, float, bool, type(None))): return obj elif isinstance(obj, (list, tuple, set)): diff --git a/aixplain/utils/cache_utils.py b/aixplain/utils/cache_utils.py index fcfe1cb6..f6c00042 100644 --- a/aixplain/utils/cache_utils.py +++ b/aixplain/utils/cache_utils.py @@ -10,11 +10,34 @@ CACHE_DURATION = 86400 -def get_cache_expiry(): +def get_cache_expiry() -> int: + """Get the cache expiration duration in seconds. + + Retrieves the cache expiration duration from the CACHE_EXPIRY_TIME + environment variable. If not set, falls back to the default CACHE_DURATION. + + Returns: + int: The cache expiration duration in seconds. + """ return int(os.getenv("CACHE_EXPIRY_TIME", CACHE_DURATION)) -def save_to_cache(cache_file, data, lock_file): +def save_to_cache(cache_file: str, data: dict, lock_file: str) -> None: + """Save data to a cache file with thread-safe file locking. + + This function saves the provided data to a JSON cache file along with a + timestamp. It uses file locking to ensure thread safety during writing. + + Args: + cache_file (str): Path to the cache file where data will be saved. + data (dict): The data to be cached. Must be JSON-serializable. + lock_file (str): Path to the lock file used for thread safety. + + Note: + - Creates the cache directory if it doesn't exist + - Logs an error if saving fails but doesn't raise an exception + - The data is saved with a timestamp for expiration checking + """ try: os.makedirs(os.path.dirname(cache_file), exist_ok=True) with FileLock(lock_file): @@ -24,7 +47,25 @@ def save_to_cache(cache_file, data, lock_file): logging.error(f"Failed to save cache to {cache_file}: {e}") -def load_from_cache(cache_file, lock_file): +def load_from_cache(cache_file: str, lock_file: str) -> dict: + """Load data from a cache file with expiration checking. + + This function loads data from a JSON cache file if it exists and hasn't + expired. It uses file locking to ensure thread safety during reading. + + Args: + cache_file (str): Path to the cache file to load data from. + lock_file (str): Path to the lock file used for thread safety. + + Returns: + dict: The cached data if the cache exists and hasn't expired, + None otherwise. + + Note: + - Returns None if the cache file doesn't exist + - Returns None if the cached data has expired based on CACHE_EXPIRY_TIME + - Uses thread-safe file locking for reading + """ if os.path.exists(cache_file): with FileLock(lock_file): with open(cache_file, "r") as f: diff --git a/aixplain/utils/convert_datatype_utils.py b/aixplain/utils/convert_datatype_utils.py index 00dff186..08864cf6 100644 --- a/aixplain/utils/convert_datatype_utils.py +++ b/aixplain/utils/convert_datatype_utils.py @@ -25,6 +25,12 @@ def dict_to_metadata(metadatas: List[Union[Dict, MetaData]]) -> None: Args: metadatas (List[Union[Dict, MetaData]], optional): metadata of metadata information of the dataset. + Returns: + None + + Raises: + TypeError: If one or more elements in the metadata_schema are not well-structured + """ try: for i in range(len(metadatas)): diff --git a/aixplain/utils/file_utils.py b/aixplain/utils/file_utils.py index 3eddb95d..1035943d 100644 --- a/aixplain/utils/file_utils.py +++ b/aixplain/utils/file_utils.py @@ -29,15 +29,25 @@ from pandas import DataFrame -def save_file(download_url: Text, download_file_path: Optional[Any] = None) -> Any: - """Download and save file from given URL +def save_file(download_url: Text, download_file_path: Optional[Union[str, Path]] = None) -> Union[str, Path]: + """Download and save a file from a given URL. + + This function downloads a file from the specified URL and saves it either + to a specified path or to a generated path in the 'aiXplain' directory. Args: - download_url (Text): URL of file to download - download_file_path (Any, optional): File path to save downloaded file. If None then generates a folder 'aiXplain' in current working directory. Defaults to None. + download_url (Text): URL of the file to download. + download_file_path (Optional[Union[str, Path]], optional): Path where the + downloaded file should be saved. If None, generates a folder 'aiXplain' + in the current working directory and saves the file there with a UUID + name. Defaults to None. Returns: - Text: Path where file was downloaded + Union[str, Path]: Path where the file was downloaded. + + Note: + If download_file_path is None, the file will be saved with a UUID name + and the original file extension in the 'aiXplain' directory. """ if download_file_path is None: save_dir = os.getcwd() @@ -51,7 +61,26 @@ def save_file(download_url: Text, download_file_path: Optional[Any] = None) -> A return download_file_path -def download_data(url_link, local_filename=None): +def download_data(url_link: str, local_filename: Optional[str] = None) -> str: + """Download a file from a URL with streaming support. + + This function downloads a file from the specified URL using streaming to + handle large files efficiently. The file is downloaded in chunks to + minimize memory usage. + + Args: + url_link (str): URL of the file to download. + local_filename (Optional[str], optional): Local path where the file + should be saved. If None, uses the last part of the URL as the + filename. Defaults to None. + + Returns: + str: Path to the downloaded file. + + Raises: + requests.exceptions.RequestException: If the download fails or the + server returns an error status. + """ if local_filename is None: local_filename = url_link.split("/")[-1] with requests.get(url_link, stream=True) as r: @@ -74,24 +103,42 @@ def upload_data( content_encoding: Optional[Text] = None, nattempts: int = 2, return_download_link: bool = False, -): - """Upload files to S3 with pre-signed URLs +) -> str: + """Upload a file to S3 using pre-signed URLs with retry support. + + This function handles file uploads to S3 by first obtaining a pre-signed URL + from the aiXplain backend and then using it to upload the file. It supports + both temporary and permanent storage with optional metadata like tags and + license information. Args: - file_name (Union[Text, Path]): local path of file to be uploaded - tags (List[Text], optional): tags of the file - license (License, optional): the license for the file - is_temp (bool): specify if the file that will be upload is a temporary file - content_type (Text, optional): Type of content. Defaults to "text/csv". - content_encoding (Text, optional): Content encoding. Defaults to None. - nattempts (int, optional): Number of attempts for diminish the risk of exceptions. Defaults to 2. - return_download_link (bool, optional): If True, the function will return the download link instead of the presigned url. Defaults to False. - - Reference: - https://python.plainenglish.io/upload-files-to-aws-s3-using-pre-signed-urls-in-python-d3c2fcab1b41 + file_name (Union[Text, Path]): Local path of the file to upload. + tags (Optional[List[Text]], optional): List of tags to associate with + the file. Only used when is_temp is False. Defaults to None. + license (Optional[License], optional): License to associate with the file. + Only used when is_temp is False. Defaults to None. + is_temp (bool, optional): Whether to upload as a temporary file. + Temporary files have different handling and URL generation. + Defaults to True. + content_type (Text, optional): MIME type of the content being uploaded. + Defaults to "text/csv". + content_encoding (Optional[Text], optional): Content encoding of the file + (e.g., 'gzip'). Defaults to None. + nattempts (int, optional): Number of retry attempts for upload failures. + Defaults to 2. + return_download_link (bool, optional): If True, returns a direct download + URL instead of the S3 path. Defaults to False. Returns: - URL: s3 path + str: Either an S3 path (s3://bucket/key) or a download URL, depending + on return_download_link parameter. + + Raises: + Exception: If the upload fails after all retry attempts. + + Note: + The function will automatically retry failed uploads up to nattempts + times before raising an exception. """ try: # Get pre-signed URL @@ -158,15 +205,41 @@ def upload_data( def s3_to_csv( - s3_url: Text, aws_credentials: Optional[Dict[Text, Text]] = {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None} -) -> Text: - """Convert s3 url to a csv file and download the file in `download_path` + s3_url: Text, + aws_credentials: Optional[Dict[Text, Text]] = {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None} +) -> str: + """Convert S3 directory contents to a CSV file with file listings. + + This function takes an S3 URL and creates a CSV file containing listings + of all files in that location. It handles both single files and directories, + with special handling for directory structures. Args: - s3_url (Text): s3 url + s3_url (Text): S3 URL in the format 's3://bucket-name/path'. + aws_credentials (Optional[Dict[Text, Text]], optional): AWS credentials + dictionary with 'AWS_ACCESS_KEY_ID' and 'AWS_SECRET_ACCESS_KEY'. + If not provided or values are None, uses environment variables. + Defaults to {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None}. Returns: - Path: path to csv file + str: Path to the generated CSV file. The file contains listings of + all files found in the S3 location. + + Raises: + Exception: If: + - boto3 is not installed + - Invalid S3 URL format + - AWS credentials are missing + - Bucket doesn't exist + - No files found + - Files are at bucket root + - Directory structure is invalid (unequal file counts or mismatched names) + + Note: + - The function requires the boto3 package to be installed + - The generated CSV will have a UUID as filename + - For directory structures, all subdirectories must have the same + number of files with matching prefixes """ try: import boto3 diff --git a/aixplain/utils/llm_utils.py b/aixplain/utils/llm_utils.py index 82fe3511..d9897380 100644 --- a/aixplain/utils/llm_utils.py +++ b/aixplain/utils/llm_utils.py @@ -10,11 +10,14 @@ def get_llm_instance( """Get an LLM instance with specific configuration. Args: - llm_id (Text): ID of the LLM model to use + llm_id (Text): ID of the LLM model to use. api_key (Optional[Text], optional): API key to use. Defaults to None. Returns: - LLM: Configured LLM instance + LLM: Configured LLM instance. + + Raises: + Exception: If the LLM model with the given ID is not found. """ try: llm = ModelFactory.get(llm_id, api_key=api_key) diff --git a/aixplain/utils/request_utils.py b/aixplain/utils/request_utils.py index 7b470b72..26f55a93 100644 --- a/aixplain/utils/request_utils.py +++ b/aixplain/utils/request_utils.py @@ -9,10 +9,10 @@ def _request_with_retry(method: Text, url: Text, **params) -> requests.Response: Args: method (Text): HTTP method, such as 'GET' or 'HEAD'. url (Text): The URL of the resource to fetch. - **params: Params to pass to request function + **params: Params to pass to request function. Returns: - requests.Response: Response object of the request + requests.Response: Response object of the request. """ session = requests.Session() retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) diff --git a/aixplain/utils/validation_utils.py b/aixplain/utils/validation_utils.py index 03ed2231..4477238b 100644 --- a/aixplain/utils/validation_utils.py +++ b/aixplain/utils/validation_utils.py @@ -8,7 +8,19 @@ from aixplain.enums import DataSubtype -def _is_split(dsubtype: Union[Text, DataSubtype]): +def _is_split(dsubtype: Union[Text, DataSubtype]) -> bool: + """Check if a data subtype represents a split. + + Args: + dsubtype (Union[Text, DataSubtype]): The data subtype to check. Can be + either a string or a DataSubtype enum value. + + Returns: + bool: True if the subtype represents a split, False otherwise. + + Note: + The comparison is case-insensitive. + """ return str(dsubtype).lower() == "split" @@ -23,17 +35,50 @@ def dataset_onboarding_validation( split_rate: Optional[List[float]] = None, s3_link: Optional[str] = None, ) -> None: - """Dataset Onboard Validation + """Validate dataset parameters before onboarding. + + This function performs comprehensive validation of dataset parameters to ensure + they meet the requirements for onboarding. It checks: + - Input/output data type compatibility with the specified function + - Presence of required input data + - Validity of dataset splitting configuration + - Presence of content data source + Args: - input_schema (List[Union[Dict, MetaData]]): metadata of inputs - output_schema (List[Union[Dict, MetaData]]): metadata of outputs - function (Function): dataset function - input_ref_data (Dict[Text, Any], optional): reference to input data which is already in the platform. Defaults to {}. - metadata_schema (List[Union[Dict, MetaData]], optional): metadata of metadata information of the dataset. Defaults to []. - content_path (Union[Union[Text, Path], List[Union[Text, Path]]]): path to files which contain the data content - split_labels: (Optional[List[Text]]): The delimiters according which to split the dataset - split_rate: (Optional[List[float]]): the rate of spliting the dataset - s3_link (Optional[str]): s3 url to files or directories + input_schema (List[Union[Dict, MetaData]]): Metadata describing the input + data structure and types. + output_schema (List[Union[Dict, MetaData]]): Metadata describing the output + data structure and types. + function (Function): The function type that this dataset is designed for + (e.g., translation, transcription). + input_ref_data (Dict[Text, Any], optional): References to existing input + data in the platform. Defaults to {}. + metadata_schema (List[Union[Dict, MetaData]], optional): Additional metadata + describing the dataset. Defaults to []. + content_path (Union[Union[Text, Path], List[Union[Text, Path]]], optional): + Path(s) to local files containing the data. Defaults to []. + split_labels (Optional[List[Text]], optional): Labels for dataset splits + (e.g., ["train", "test"]). Must be provided with split_rate. + Defaults to None. + split_rate (Optional[List[float]], optional): Proportions for dataset splits + (e.g., [0.8, 0.2]). Must sum to 1.0 and match split_labels length. + Defaults to None. + s3_link (Optional[str], optional): S3 URL to data files or directories. + Alternative to content_path. Defaults to None. + + Raises: + AssertionError: If any validation fails: + - No input data specified + - Incompatible input/output types for function + - Invalid split configuration + - No content source provided + - Multiple split metadata entries + - Invalid split metadata type + - Mismatched split labels and rates + + Note: + Either content_path or s3_link must be provided. If using splits, + both split_labels and split_rate must be provided. """ metadata_spliting_schema = list(filter(lambda md: str(md.dsubtype) == "split", metadata_schema)) From 00b9c1f8a350b2200702d58b0e1577197185816c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20G=C3=BCnd=C3=BCz?= Date: Wed, 13 Aug 2025 18:40:47 +0300 Subject: [PATCH 2/2] Update function_type.py --- aixplain/enums/function_type.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aixplain/enums/function_type.py b/aixplain/enums/function_type.py index 835ca306..f48f7f95 100644 --- a/aixplain/enums/function_type.py +++ b/aixplain/enums/function_type.py @@ -42,6 +42,7 @@ class FunctionType(Enum): CONNECTION (str): Connection function type. MCP_CONNECTION (str): MCP connection function type. MCPSERVER (str): MCP server function type. + MCPSERVERSAS (str): MCP server sas function type """ AI = "ai" SEGMENTOR = "segmentor" @@ -53,4 +54,4 @@ class FunctionType(Enum): CONNECTION = "connection" MCP_CONNECTION = "mcpconnection" MCPSERVER = "mcpserver" - MCPSERVERSAS = "mcpserversas" \ No newline at end of file + MCPSERVERSAS = "mcpserversas"