Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
1b67aed
Create bounds for FineTune hyperparameters (#103)
lucas-aixplain Jan 15, 2024
61359d8
Fixing pipeline general asset test (#106)
thiago-aixplain Jan 15, 2024
c293a69
Merge dev to test (#107)
lucas-aixplain Jan 16, 2024
572750c
Development to Test (#109)
thiago-aixplain Jan 17, 2024
adfe272
Merge to test (#111)
ikxplain Jan 24, 2024
8365218
Update Finetuner functional tests (#112)
lucas-aixplain Jan 29, 2024
43a2b00
Merge branch 'test' into development
thiago-aixplain Jan 29, 2024
846e300
Merge dev to test (#113)
lucas-aixplain Jan 29, 2024
359209a
Hf deployment test (#114)
mikelam-us-aixplain Jan 29, 2024
74af040
Hf deployment test (#115)
mikelam-us-aixplain Jan 29, 2024
ffddc00
Hf deployment test (#118)
mikelam-us-aixplain Jan 30, 2024
7adfddd
Hf deployment test (#117)
mikelam-us-aixplain Jan 30, 2024
367ad74
Merge branch 'main' into test
ikxplain Jan 31, 2024
4c5358c
Do not download textual URLs (#120)
thiago-aixplain Feb 1, 2024
cb14b5a
Enable api key parameter in data asset creation (#122)
thiago-aixplain Feb 5, 2024
607852e
Merge to test (#124)
ikxplain Feb 6, 2024
66a043b
Update Finetuner hyperparameters (#125)
lucas-aixplain Feb 7, 2024
3b7f8b7
Merge dev to test (#126)
lucas-aixplain Feb 7, 2024
3bc04d3
Add new LLMs finetuner models (mistral and solar) (#128)
lucas-aixplain Feb 13, 2024
5954b63
Merge dev to test (#129)
lucas-aixplain Feb 13, 2024
58855c1
Enabling dataset ID and model ID as parameters for finetuner creation…
thiago-aixplain Feb 15, 2024
426213e
Fix supplier representation of a model (#132)
thiago-aixplain Feb 16, 2024
d62ad83
Fixing indentation in documentation sample code (#134)
thiago-aixplain Feb 21, 2024
7593033
Merge to test (#135)
ikxplain Feb 28, 2024
2f107e7
Update FineTune unit and functional tests (#136)
lucas-aixplain Mar 1, 2024
b5112dc
Merge branch 'test' into development
lucas-aixplain Mar 1, 2024
e6f1f02
Merge dev to test (#137)
lucas-aixplain Mar 1, 2024
32179d6
Click fix (#140)
mikelam-us-aixplain Mar 5, 2024
501e27b
Merge branch 'main' into test
thiago-aixplain Mar 7, 2024
94cf5f0
Merge branch 'test' into development
lucas-aixplain Mar 12, 2024
e0e260c
Merge to test (#142)
ikxplain Mar 12, 2024
34861a3
M 5905660469 enhance benchmark job response (#145)
shreyasXplain Mar 18, 2024
6fc9646
M 5905660469 enhance benchmark job response (#146)
shreyasXplain Mar 19, 2024
b4e5b67
New pipeline functional tests (#143)
thiago-aixplain Mar 19, 2024
873cb6a
M 6107719447 check finetuner status (#133)
thiago-aixplain Mar 21, 2024
c2a2cfd
Update metric.py (#147)
shreyasXplain Mar 22, 2024
88ab478
Dev 2 Test version 0.2.11 (#149)
thiago-aixplain Mar 22, 2024
e4b076b
Fixing finetune checker test and doc (#150)
thiago-aixplain Mar 22, 2024
84bd827
Development to Test (#151)
thiago-aixplain Mar 22, 2024
451f309
Skip model onboard and deployment tests (#153)
thiago-aixplain Mar 25, 2024
8df2bd0
Saving/Update Pipeline Services (#154)
thiago-aixplain Mar 26, 2024
bdf33b1
Merge to test. (#156)
ikxplain Mar 27, 2024
28f3092
Do not run service mode tests for specific pipelines (#159)
thiago-aixplain Apr 3, 2024
b86cf45
Merge branch 'test' into development
thiago-aixplain Apr 3, 2024
d1debf5
Development 2 Test (#160)
thiago-aixplain Apr 3, 2024
b45f12e
Fix on Metadata (#162)
thiago-aixplain Apr 9, 2024
dc45128
Merge to test (#163)
ikxplain Apr 10, 2024
5d75317
Interval label new structures (#157)
thiago-aixplain Apr 15, 2024
b43e7ec
Development 2 Test - Release 0.2.11 (#165)
thiago-aixplain Apr 15, 2024
da84847
Merge branch 'main' into test
ikxplain Apr 16, 2024
e4b2b22
Support text labels (#167)
thiago-aixplain Apr 17, 2024
4ccaa36
SDK's UML diagram (#169)
thiago-aixplain Apr 19, 2024
b1997cf
Merge remote-tracking branch 'origin/development' into test
ikxplain Apr 29, 2024
d3da2d2
Merge remote-tracking branch 'origin/test' into test
ikxplain Apr 29, 2024
d66d7d9
Merge dev to test (#173)
lucas-aixplain May 2, 2024
9d6f009
Merge to test (#177)
ikxplain May 8, 2024
ff6e204
Merge to test (#183)
ikxplain May 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aixplain/enums/data_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ class DataType(Enum):
VIDEO = "video"

def __str__(self):
return self._value_
return self._value_
22 changes: 10 additions & 12 deletions aixplain/factories/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _create_model_from_response(cls, response: Dict) -> Model:
response["name"],
supplier=response["supplier"],
api_key=response["api_key"],
pricing=response["pricing"],
cost=response["pricing"],
function=Function(response["function"]["id"]),
parameters=parameters,
is_subscribed=True if "subscription" in response else False,
Expand Down Expand Up @@ -404,9 +404,11 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke
message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed."
logging.info(message)
return response

@classmethod
def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict:
def deploy_huggingface_model(
cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None
) -> Dict:
"""Onboards and deploys a Hugging Face large language model.

Args:
Expand All @@ -433,20 +435,16 @@ def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Option
"sourceLanguage": "en",
},
"source": "huggingface",
"onboardingParams": {
"hf_model_name": model_name,
"hf_supplier": supplier,
"hf_token": hf_token
}
"onboardingParams": {"hf_model_name": model_name, "hf_supplier": supplier, "hf_token": hf_token},
}
response = _request_with_retry("post", deploy_url, headers=headers, json=body)
logging.debug(response.text)
response_dicts = json.loads(response.text)
return response_dicts

@classmethod
def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] = None):
"""Gets the on-boarding status of a Hugging Face model with ID MODEL_ID.
"""Gets the on-boarding status of a Hugging Face model with ID MODEL_ID.

Args:
model_id (Text): The model's ID as returned by DEPLOY_HUGGINGFACE_MODEL
Expand All @@ -466,6 +464,6 @@ def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] =
"status": response_dicts["status"],
"name": response_dicts["name"],
"id": response_dicts["id"],
"pricing": response_dicts["pricing"]
"pricing": response_dicts["pricing"],
}
return ret_dict
return ret_dict
40 changes: 39 additions & 1 deletion aixplain/factories/pipeline_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"""
import json
import logging
import os
from typing import Dict, List, Optional, Text, Union
from aixplain.enums.data_type import DataType
from aixplain.enums.function import Function
Expand Down Expand Up @@ -207,7 +208,7 @@ def list(
output_data_types = [output_data_types]
payload["inputDataTypes"] = [data_type.value for data_type in output_data_types]

logging.info(f"Start service for POST List Dataset - {url} - {headers} - {json.dumps(payload)}")
logging.info(f"Start service for POST List Pipeline - {url} - {headers} - {json.dumps(payload)}")
r = _request_with_retry("post", url, headers=headers, json=payload)
resp = r.json()

Expand All @@ -220,3 +221,40 @@ def list(
for pipeline in results:
pipelines.append(cls.__from_response(pipeline))
return {"results": pipelines, "page_total": page_total, "page_number": page_number, "total": total}

@classmethod
def create(cls, name: Text, pipeline: Union[Text, Dict], status: Text = "draft") -> Pipeline:
"""Pipeline Creation

Args:
name (Text): Pipeline Name
pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file
status (Text, optional): Status of the pipeline. Currently only draft pipelines can be saved. Defaults to "draft".

Raises:
Exception: Currently just the creation of draft pipelines are supported

Returns:
Pipeline: instance of the new pipeline
"""
try:
assert status == "draft", "Pipeline Creation Error: Currently just the creation of draft pipelines are supported."
if isinstance(pipeline, str) is True:
_, ext = os.path.splitext(pipeline)
assert (
os.path.exists(pipeline) and ext == ".json"
), "Pipeline Creation Error: Make sure the pipeline to be save is in a JSON file."
with open(pipeline) as f:
pipeline = json.load(f)

# prepare payload
payload = {"name": name, "status": "draft", "architecture": pipeline}
url = urljoin(cls.backend_url, "sdk/pipelines")
headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"}
logging.info(f"Start service for POST Create Pipeline - {url} - {headers} - {json.dumps(payload)}")
r = _request_with_retry("post", url, headers=headers, json=payload)
response = r.json()

return Pipeline(response["id"], name, config.TEAM_API_KEY)
except Exception as e:
raise Exception(e)
3 changes: 2 additions & 1 deletion aixplain/modules/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
version: Text = "1.0",
license: Optional[License] = None,
privacy: Privacy = Privacy.PRIVATE,
cost: float = 0,
cost: Optional[Union[Dict, float]] = None,
) -> None:
"""Create an Asset with the necessary information

Expand All @@ -46,6 +46,7 @@ def __init__(
description (Text): Description of the Asset
supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain".
version (Optional[Text], optional): asset version. Defaults to "1.0".
cost (Optional[Union[Dict, float]], optional): asset price. Defaults to None.
"""
self.id = id
self.name = name
Expand Down
3 changes: 3 additions & 0 deletions aixplain/modules/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def __init__(
file_extension: Optional[FileType] = None,
languages: List[Language] = [],
dsubtype: DataSubtype = DataSubtype.OTHER,
id: Optional[Text] = None,
**kwargs
) -> None:
"""MetaData Class
Expand All @@ -62,6 +63,7 @@ def __init__(
file_extension (Optional[FileType], optional): File extension (e.g. CSV, TXT, etc.). Defaults to None.
languages (List[Language], optional): List of languages which the data consists of. Defaults to [].
dsubtype (DataSubtype, optional): Data subtype (e.g., age, topic, race, split, etc.), used in datasets metadata. Defaults to Other.
id (Optional[Text], optional): Data ID. Defaults to None.
"""
self.name = name
if isinstance(dtype, str):
Expand Down Expand Up @@ -91,4 +93,5 @@ def __init__(
language = Language(language)
self.languages.append(language)
self.dsubtype = dsubtype
self.id = id
self.kwargs = kwargs
15 changes: 9 additions & 6 deletions aixplain/modules/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,10 @@ def __init__(
supplier (Text): author of the Metric
is_reference_required (bool): does the metric use reference
is_source_required (bool): does the metric use source
cost (float): cost of the metric
cost (float): price of the metric
normalization_options(list, [])
**additional_info: Any additional Metric info to be saved
"""


super().__init__(id, name, description="", supplier=supplier, version="1.0", cost=cost)
self.is_source_required = is_source_required
self.is_reference_required = is_reference_required
Expand All @@ -76,7 +74,7 @@ def __init__(

def __repr__(self) -> str:
return f"<Metric {self.name}>"

def add_normalization_options(self, normalization_options: List[str]):
"""Add a given set of normalization options to be used while benchmarking

Expand All @@ -85,7 +83,12 @@ def add_normalization_options(self, normalization_options: List[str]):
"""
self.normalization_options.append(normalization_options)

def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional[Union[str, List[str]]]=None, reference: Optional[Union[str, List[str]]]=None):
def run(
self,
hypothesis: Optional[Union[str, List[str]]] = None,
source: Optional[Union[str, List[str]]] = None,
reference: Optional[Union[str, List[str]]] = None,
):
"""Run the metric to calculate the scores.

Args:
Expand All @@ -94,6 +97,7 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional
reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None.
"""
from aixplain.factories.model_factory import ModelFactory

model = ModelFactory.get(self.id)
payload = {
"function": self.function,
Expand All @@ -115,4 +119,3 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional
reference = [[ref] for ref in reference]
payload["references"] = reference
return model.run(payload)

12 changes: 8 additions & 4 deletions aixplain/modules/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Model(Asset):
function (Text, optional): model AI function. Defaults to None.
url (str): URL to run the model.
backend_url (str): URL of the backend.
pricing (Dict, optional): model price. Defaults to None.
**additional_info: Any additional Model info to be saved
"""

Expand All @@ -61,6 +62,7 @@ def __init__(
version: Optional[Text] = None,
function: Optional[Text] = None,
is_subscribed: bool = False,
cost: Optional[Dict] = None,
**additional_info,
) -> None:
"""Model Init
Expand All @@ -74,9 +76,10 @@ def __init__(
version (Text, optional): version of the model. Defaults to "1.0".
function (Text, optional): model AI function. Defaults to None.
is_subscribed (bool, optional): Is the user subscribed. Defaults to False.
cost (Dict, optional): model price. Defaults to None.
**additional_info: Any additional Model info to be saved
"""
super().__init__(id, name, description, supplier, version)
super().__init__(id, name, description, supplier, version, cost=cost)
self.api_key = api_key
self.additional_info = additional_info
self.url = config.MODELS_RUN_URL
Expand Down Expand Up @@ -264,6 +267,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
"""
from aixplain.enums.asset_status import AssetStatus
from aixplain.modules.finetune.status import FinetuneStatus

headers = {"x-api-key": self.api_key, "Content-Type": "application/json"}
resp = None
try:
Expand All @@ -274,15 +278,15 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
finetune_status = AssetStatus(resp["finetuneStatus"])
model_status = AssetStatus(resp["modelStatus"])
logs = sorted(resp["logs"], key=lambda x: float(x["epoch"]))

target_epoch = None
if after_epoch is not None:
logs = [log for log in logs if float(log["epoch"]) > after_epoch]
if len(logs) > 0:
target_epoch = float(logs[0]["epoch"])
elif len(logs) > 0:
target_epoch = float(logs[-1]["epoch"])

if target_epoch is not None:
log = None
for log_ in logs:
Expand All @@ -294,7 +298,7 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
log["trainLoss"] = log_["trainLoss"]
if log_["evalLoss"] is not None:
log["evalLoss"] = log_["evalLoss"]

status = FinetuneStatus(
status=finetune_status,
model_status=model_status,
Expand Down
31 changes: 31 additions & 0 deletions aixplain/modules/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@

import time
import json
import os
import logging
from aixplain.modules.asset import Asset
from aixplain.utils import config
from aixplain.utils.file_utils import _request_with_retry
from typing import Dict, Optional, Text, Union
from urllib.parse import urljoin


class Pipeline(Asset):
Expand Down Expand Up @@ -306,3 +308,32 @@ def run_async(
if resp is not None:
response["error"] = resp
return response

def update(self, pipeline: Union[Text, Dict]):
"""Update Pipeline

Args:
pipeline (Union[Text, Dict]): Pipeline as a Python dictionary or in a JSON file

Raises:
Exception: Make sure the pipeline to be save is in a JSON file.
"""
try:
if isinstance(pipeline, str) is True:
_, ext = os.path.splitext(pipeline)
assert (
os.path.exists(pipeline) and ext == ".json"
), "Pipeline Update Error: Make sure the pipeline to be save is in a JSON file."
with open(pipeline) as f:
pipeline = json.load(f)

# prepare payload
payload = {"name": self.name, "status": "draft", "architecture": pipeline}
url = urljoin(config.BACKEND_URL, f"sdk/pipelines/{self.id}")
headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"}
logging.info(f"Start service for PUT Update Pipeline - {url} - {headers} - {json.dumps(payload)}")
r = _request_with_retry("put", url, headers=headers, json=payload)
response = r.json()
logging.info(f"Pipeline {response['id']} Updated.")
except Exception as e:
raise Exception(e)
4 changes: 2 additions & 2 deletions aixplain/processes/data_onboarding/onboard_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,11 @@ def process_data_files(
-1,
0,
)
if metadata.dtype in [DataType.AUDIO, DataType.IMAGE] or metadata.dsubtype == DataSubtype.INTERVAL:
if metadata.dtype in [DataType.AUDIO, DataType.IMAGE, DataType.LABEL] or metadata.dsubtype == DataSubtype.INTERVAL:
files, data_column_idx, start_column_idx, end_column_idx, nrows = process_media_files.run(
metadata=metadata, paths=paths, folder=folder
)
elif metadata.dtype in [DataType.TEXT, DataType.LABEL]:
elif metadata.dtype in [DataType.TEXT]:
files, data_column_idx, nrows = process_text_files.run(metadata=metadata, paths=paths, folder=folder)
return files, data_column_idx, start_column_idx, end_column_idx, nrows

Expand Down
18 changes: 18 additions & 0 deletions aixplain/processes/data_onboarding/process_media_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import shutil
import tarfile
import validators

from aixplain.enums.data_subtype import DataSubtype
from aixplain.enums.data_type import DataType
Expand All @@ -16,6 +17,7 @@
from pathlib import Path
from tqdm import tqdm
from typing import List, Tuple
from urllib.parse import urlparse

AUDIO_MAX_SIZE = 50000000
IMAGE_TEXT_MAX_SIZE = 25000000
Expand Down Expand Up @@ -45,6 +47,11 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
Returns:
Tuple[List[File], int, int, int]: list of s3 links; data, start and end columns index, and number of rows
"""
if metadata.dtype != DataType.LABEL:
assert (
metadata.storage_type != StorageType.TEXT
), f'Data Asset Onboarding Error: Column "{metadata.name}" of type "{metadata.dtype}" can not be stored in text.'

# if files are stored locally, create a folder to store it
media_folder = Path(".")
if metadata.storage_type == StorageType.FILE:
Expand Down Expand Up @@ -95,6 +102,10 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
assert (
os.path.getsize(media_path) <= AUDIO_MAX_SIZE
), f'Data Asset Onboarding Error: Local audio file "{media_path}" exceeds the size limit of 50 MB.'
elif metadata.dtype == DataType.LABEL:
assert (
os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE
), f'Data Asset Onboarding Error: Local label file "{media_path}" exceeds the size limit of 25 MB.'
else:
assert (
os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE
Expand All @@ -105,6 +116,13 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
shutil.copy2(media_path, new_path)
batch.append(fname)
else:
if metadata.storage_type == StorageType.TEXT and (
str(media_path).startswith("s3://")
or str(media_path).startswith("http://")
or str(media_path).startswith("https://")
or validators.url(media_path)
):
media_path = "DONOTDOWNLOAD" + str(media_path)
batch.append(media_path)

# crop intervals can not be used with interval data types
Expand Down
2 changes: 1 addition & 1 deletion aixplain/processes/data_onboarding/process_text_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def process_text(content: str, storage_type: StorageType) -> Text:
Text: textual content
"""
if storage_type == StorageType.FILE:
# Check the size of file and assert a limit of 50 MB
# Check the size of file and assert a limit of 25 MB
assert (
os.path.getsize(content) <= 25000000
), f'Data Asset Onboarding Error: Local text file "{content}" exceeds the size limit of 25 MB.'
Expand Down
Binary file added docs/assets/architecture.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading