From 3f79795a15bd8adef6f710fa5fc7d69d252f0511 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli Date: Tue, 7 Nov 2023 18:51:22 -0300 Subject: [PATCH 1/6] Add parameters to FineTune --- .../__init__.py} | 89 ++++++++++++++++--- .../finetune_factory/hyperparameters.py | 17 ++++ aixplain/factories/finetune_factory/peft.py | 10 +++ aixplain/modules/finetune.py | 3 + 4 files changed, 107 insertions(+), 12 deletions(-) rename aixplain/factories/{finetune_factory.py => finetune_factory/__init__.py} (52%) create mode 100644 aixplain/factories/finetune_factory/hyperparameters.py create mode 100644 aixplain/factories/finetune_factory/peft.py diff --git a/aixplain/factories/finetune_factory.py b/aixplain/factories/finetune_factory/__init__.py similarity index 52% rename from aixplain/factories/finetune_factory.py rename to aixplain/factories/finetune_factory/__init__.py index 91708f4c..89e0d850 100644 --- a/aixplain/factories/finetune_factory.py +++ b/aixplain/factories/finetune_factory/__init__.py @@ -24,12 +24,15 @@ import logging from typing import Dict, List, Optional, Text import json +from aixplain.factories.finetune_factory.hyperparameters import Hyperparameters +from aixplain.factories.finetune_factory.peft import Peft from aixplain.modules.dataset import Dataset from aixplain.modules.model import Model from aixplain.modules.finetune import Finetune from aixplain.modules.finetune_cost import FinetuneCost from aixplain.utils import config from aixplain.utils.file_utils import _request_with_retry +import re from urllib.parse import urljoin @@ -57,9 +60,55 @@ def _create_cost_from_response(cls, response: Dict) -> FinetuneCost: """ return FinetuneCost(response["trainingCost"], response["inferenceCost"], response["hostingCost"]) + @classmethod + def _get_data_list(cls, dataset: Dataset): + flatten_target_values = [item for sublist in list(dataset.target_data.values()) for item in sublist] + data_list = list(dataset.source_data.values()) + flatten_target_values + return data_list + + @classmethod + def _validate_prompt(cls, prompt: Text, dataset_list: List[Dataset]) -> Text: + result_prompt = prompt + referenced_data = set(re.findall("<<(.+?)>>", prompt)) + for dataset in dataset_list: + data_list = cls._get_data_list(dataset) + for data in data_list: + if data.id in referenced_data: + result_prompt = result_prompt.replace(f"<<{data.id}>>", f"<<{data.name}>>") + referenced_data.remove(data.id) + referenced_data.add(data.name) + + # check if dataset list has same data name and it is referenced + name_set = set() + for dataset in dataset_list: + data_list = cls._get_data_list(dataset) + for data in data_list: + assert not ( + data.name in name_set and data.name in referenced_data + ), "Datasets must not have more than one referenced data with same name" + name_set.add(data.name) + + # check if all referenced data have a respective data in dataset list + for dataset in dataset_list: + data_list = cls._get_data_list(dataset) + for data in data_list: + if data.name in referenced_data: + result_prompt = result_prompt.replace(f"<<{data.name}>>", f"{{{data.name}}}") + referenced_data.remove(data.name) + assert len(referenced_data) == 0, "Referenced data are not present in dataset list" + return result_prompt + @classmethod def create( - cls, name: Text, dataset_list: List[Dataset], model: Model, train_percentage: float = 100, dev_percentage: float = 0 + cls, + name: Text, + dataset_list: List[Dataset], + model: Model, + prompt: Optional[Text] = None, + hyperparameters: Optional[Hyperparameters] = None, + peft: Optional[Peft] = None, + train_percentage: Optional[float] = 100, + dev_percentage: Optional[float] = 0, ) -> Finetune: """Create a Finetune object with the provided information. @@ -78,24 +127,40 @@ def create( assert ( train_percentage + dev_percentage <= 100 ), f"Create FineTune: Train percentage + dev percentage ({train_percentage + dev_percentage}) must be less than or equal to one" + if prompt is not None: + prompt = cls._validate_prompt(prompt, dataset_list) try: url = urljoin(cls.backend_url, f"sdk/finetune/cost-estimation") headers = {"Authorization": f"Token {cls.api_key}", "Content-Type": "application/json"} - payload = json.dumps( - { - "datasets": [ - {"datasetId": dataset.id, "trainPercentage": train_percentage, "devPercentage": dev_percentage} - for dataset in dataset_list - ], - "sourceModelId": model.id, - } - ) + payload = { + "datasets": [ + {"datasetId": dataset.id, "trainPercentage": train_percentage, "devPercentage": dev_percentage} + for dataset in dataset_list + ], + "sourceModelId": model.id, + } + parameters = {} + if prompt is not None: + parameters["prompt"] = prompt + if hyperparameters is not None: + parameters["hyperparameters"] = hyperparameters.to_dict() + if hyperparameters is not None: + parameters["peft"] = peft.to_dict() + payload["parameters"] = parameters logging.info(f"Start service for POST Create FineTune - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, data=payload) + r = _request_with_retry("post", url, headers=headers, json=payload) resp = r.json() logging.info(f"Response for POST Create FineTune - Status {resp}") cost = cls._create_cost_from_response(resp) - return Finetune(name, dataset_list, model, cost, train_percentage=train_percentage, dev_percentage=dev_percentage) + return Finetune( + name, + dataset_list, + model, + cost, + train_percentage=train_percentage, + dev_percentage=dev_percentage, + parameters=parameters, + ) except Exception: error_message = f"Create FineTune: Error with payload {json.dumps(payload)}" logging.exception(error_message) diff --git a/aixplain/factories/finetune_factory/hyperparameters.py b/aixplain/factories/finetune_factory/hyperparameters.py new file mode 100644 index 00000000..3a68a9d7 --- /dev/null +++ b/aixplain/factories/finetune_factory/hyperparameters.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +from dataclasses_json import dataclass_json + + +@dataclass_json +@dataclass +class Hyperparameters(object): + epochs: int = 4 + train_batch_size: int = 4 + eval_batch_size: int = 4 + learning_rate: float = 2e-5 + warmup_steps: int = 500 + generation_max_length: int = 225 + tokenizer_batch_size: int = 256 + gradient_checkpointing: bool = False + gradient_accumulation_steps: int = 1 + max_seq_length: int = 4096 diff --git a/aixplain/factories/finetune_factory/peft.py b/aixplain/factories/finetune_factory/peft.py new file mode 100644 index 00000000..d17efecf --- /dev/null +++ b/aixplain/factories/finetune_factory/peft.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass +from dataclasses_json import dataclass_json + + +@dataclass_json +@dataclass +class Peft(object): + peft_lora_r: int = 8 + peft_lora_alpha: int = 32 + peft_lora_dropout: float = 0.05 diff --git a/aixplain/modules/finetune.py b/aixplain/modules/finetune.py index 3bc86ad7..ac5e43e5 100644 --- a/aixplain/modules/finetune.py +++ b/aixplain/modules/finetune.py @@ -64,6 +64,7 @@ def __init__( version: Text = "1.0", train_percentage: float = 100, dev_percentage: float = 0, + parameters: dict = None, **additional_info, ) -> None: """Create a FineTune with the necessary information. @@ -87,6 +88,7 @@ def __init__( self.cost = cost self.train_percentage = train_percentage self.dev_percentage = dev_percentage + self.parameters = parameters self.additional_info = additional_info self.backend_url = config.BACKEND_URL self.api_key = config.TEAM_API_KEY @@ -114,6 +116,7 @@ def start(self) -> Model: for dataset in self.dataset_list ], "sourceModelId": self.model.id, + "parameters": self.parameters, } ) logging.info(f"Start service for POST Start FineTune - {url} - {headers} - {json.dumps(payload)}") From b8973c16f2279a657d3293fbd10f74c7d2cb7f4f Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli Date: Tue, 7 Nov 2023 20:37:06 -0300 Subject: [PATCH 2/6] Address PR comments and add finetune to modules folder --- .../factories/finetune_factory/__init__.py | 60 ++++----------- .../finetune_factory/prompt_validator.py | 41 ++++++++++ aixplain/modules/__init__.py | 1 - .../{finetune.py => finetune/__init__.py} | 74 ++++++++++++------- .../{finetune_cost.py => finetune/cost.py} | 0 .../finetune}/hyperparameters.py | 0 .../finetune}/peft.py | 0 7 files changed, 100 insertions(+), 76 deletions(-) create mode 100644 aixplain/factories/finetune_factory/prompt_validator.py rename aixplain/modules/{finetune.py => finetune/__init__.py} (69%) rename aixplain/modules/{finetune_cost.py => finetune/cost.py} (100%) rename aixplain/{factories/finetune_factory => modules/finetune}/hyperparameters.py (100%) rename aixplain/{factories/finetune_factory => modules/finetune}/peft.py (100%) diff --git a/aixplain/factories/finetune_factory/__init__.py b/aixplain/factories/finetune_factory/__init__.py index 89e0d850..b4963a0d 100644 --- a/aixplain/factories/finetune_factory/__init__.py +++ b/aixplain/factories/finetune_factory/__init__.py @@ -24,15 +24,15 @@ import logging from typing import Dict, List, Optional, Text import json -from aixplain.factories.finetune_factory.hyperparameters import Hyperparameters -from aixplain.factories.finetune_factory.peft import Peft +from aixplain.factories.finetune_factory.prompt_validator import validate_prompt +from aixplain.modules.finetune import Finetune +from aixplain.modules.finetune.cost import FinetuneCost +from aixplain.modules.finetune.hyperparameters import Hyperparameters +from aixplain.modules.finetune.peft import Peft from aixplain.modules.dataset import Dataset from aixplain.modules.model import Model -from aixplain.modules.finetune import Finetune -from aixplain.modules.finetune_cost import FinetuneCost from aixplain.utils import config from aixplain.utils.file_utils import _request_with_retry -import re from urllib.parse import urljoin @@ -60,44 +60,6 @@ def _create_cost_from_response(cls, response: Dict) -> FinetuneCost: """ return FinetuneCost(response["trainingCost"], response["inferenceCost"], response["hostingCost"]) - @classmethod - def _get_data_list(cls, dataset: Dataset): - flatten_target_values = [item for sublist in list(dataset.target_data.values()) for item in sublist] - data_list = list(dataset.source_data.values()) + flatten_target_values - return data_list - - @classmethod - def _validate_prompt(cls, prompt: Text, dataset_list: List[Dataset]) -> Text: - result_prompt = prompt - referenced_data = set(re.findall("<<(.+?)>>", prompt)) - for dataset in dataset_list: - data_list = cls._get_data_list(dataset) - for data in data_list: - if data.id in referenced_data: - result_prompt = result_prompt.replace(f"<<{data.id}>>", f"<<{data.name}>>") - referenced_data.remove(data.id) - referenced_data.add(data.name) - - # check if dataset list has same data name and it is referenced - name_set = set() - for dataset in dataset_list: - data_list = cls._get_data_list(dataset) - for data in data_list: - assert not ( - data.name in name_set and data.name in referenced_data - ), "Datasets must not have more than one referenced data with same name" - name_set.add(data.name) - - # check if all referenced data have a respective data in dataset list - for dataset in dataset_list: - data_list = cls._get_data_list(dataset) - for data in data_list: - if data.name in referenced_data: - result_prompt = result_prompt.replace(f"<<{data.name}>>", f"{{{data.name}}}") - referenced_data.remove(data.name) - assert len(referenced_data) == 0, "Referenced data are not present in dataset list" - return result_prompt - @classmethod def create( cls, @@ -116,9 +78,11 @@ def create( name (Text): Name of the Finetune. dataset_list (List[Dataset]): List of Datasets to be used for fine-tuning. model (Model): Model to be fine-tuned. + prompt (Text, optional): Fine-tuning prompt. Defaults to None. + hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None. + peft (Peft, optional): PEFT (Parameter-Efficient Fine-Tuning) configuration. Defaults to None. train_percentage (float, optional): Percentage of training samples. Defaults to 100. dev_percentage (float, optional): Percentage of development samples. Defaults to 0. - Returns: Finetune: The Finetune object created with the provided information or None if there was an error. """ @@ -128,7 +92,7 @@ def create( train_percentage + dev_percentage <= 100 ), f"Create FineTune: Train percentage + dev percentage ({train_percentage + dev_percentage}) must be less than or equal to one" if prompt is not None: - prompt = cls._validate_prompt(prompt, dataset_list) + prompt = validate_prompt(prompt, dataset_list) try: url = urljoin(cls.backend_url, f"sdk/finetune/cost-estimation") headers = {"Authorization": f"Token {cls.api_key}", "Content-Type": "application/json"} @@ -144,7 +108,7 @@ def create( parameters["prompt"] = prompt if hyperparameters is not None: parameters["hyperparameters"] = hyperparameters.to_dict() - if hyperparameters is not None: + if peft is not None: parameters["peft"] = peft.to_dict() payload["parameters"] = parameters logging.info(f"Start service for POST Create FineTune - {url} - {headers} - {json.dumps(payload)}") @@ -159,7 +123,9 @@ def create( cost, train_percentage=train_percentage, dev_percentage=dev_percentage, - parameters=parameters, + prompt=prompt, + hyperparameters=hyperparameters, + peft=peft, ) except Exception: error_message = f"Create FineTune: Error with payload {json.dumps(payload)}" diff --git a/aixplain/factories/finetune_factory/prompt_validator.py b/aixplain/factories/finetune_factory/prompt_validator.py new file mode 100644 index 00000000..2ed753e1 --- /dev/null +++ b/aixplain/factories/finetune_factory/prompt_validator.py @@ -0,0 +1,41 @@ +from typing import List, Text +from aixplain.modules.dataset import Dataset +import re + + +def _get_data_list(dataset: Dataset): + flatten_target_values = [item for sublist in list(dataset.target_data.values()) for item in sublist] + data_list = list(dataset.source_data.values()) + flatten_target_values + return data_list + + +def validate_prompt(prompt: Text, dataset_list: List[Dataset]) -> Text: + result_prompt = prompt + referenced_data = set(re.findall("<<(.+?)>>", prompt)) + for dataset in dataset_list: + data_list = _get_data_list(dataset) + for data in data_list: + if data.id in referenced_data: + result_prompt = result_prompt.replace(f"<<{data.id}>>", f"<<{data.name}>>") + referenced_data.remove(data.id) + referenced_data.add(data.name) + + # check if dataset list has same data name and it is referenced + name_set = set() + for dataset in dataset_list: + data_list = _get_data_list(dataset) + for data in data_list: + assert not ( + data.name in name_set and data.name in referenced_data + ), "Datasets must not have more than one referenced data with same name" + name_set.add(data.name) + + # check if all referenced data have a respective data in dataset list + for dataset in dataset_list: + data_list = _get_data_list(dataset) + for data in data_list: + if data.name in referenced_data: + result_prompt = result_prompt.replace(f"<<{data.name}>>", f"{{{data.name}}}") + referenced_data.remove(data.name) + assert len(referenced_data) == 0, "Referenced data are not present in dataset list" + return result_prompt diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index 5d58847b..0e5ac003 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -31,4 +31,3 @@ from .finetune import Finetune from .benchmark import Benchmark from .benchmark_job import BenchmarkJob -from .finetune_cost import FinetuneCost diff --git a/aixplain/modules/finetune.py b/aixplain/modules/finetune/__init__.py similarity index 69% rename from aixplain/modules/finetune.py rename to aixplain/modules/finetune/__init__.py index ac5e43e5..cf311f8e 100644 --- a/aixplain/modules/finetune.py +++ b/aixplain/modules/finetune/__init__.py @@ -20,17 +20,20 @@ Description: FineTune Class """ -from typing import List, Text +from typing import List, Text, Optional import logging -from aixplain.utils.file_utils import _request_with_retry import json from urllib.parse import urljoin -from aixplain.utils import config +from aixplain.modules.finetune.cost import FinetuneCost +from aixplain.modules.finetune.hyperparameters import Hyperparameters +from aixplain.modules.finetune.peft import Peft from aixplain.factories.model_factory import ModelFactory from aixplain.modules.asset import Asset from aixplain.modules.dataset import Dataset from aixplain.modules.model import Model -from aixplain.modules.finetune_cost import FinetuneCost + +from aixplain.utils import config +from aixplain.utils.file_utils import _request_with_retry class Finetune(Asset): @@ -47,6 +50,9 @@ class Finetune(Asset): version (Text): Version of the FineTune. train_percentage (float): Percentage of training samples. dev_percentage (float): Percentage of development samples. + prompt (Text): Fine-tuning prompt. + hyperparameters (Hyperparameters): Hyperparameters for fine-tuning. + peft (Peft): PEFT (Parameter-Efficient Fine-Tuning) configuration. additional_info (dict): Additional information to be saved with the FineTune. backend_url (str): URL of the backend. api_key (str): The TEAM API key used for authentication. @@ -58,13 +64,15 @@ def __init__( dataset_list: List[Dataset], model: Model, cost: FinetuneCost, - id: Text = "", - description: Text = "", - supplier: Text = "aiXplain", - version: Text = "1.0", - train_percentage: float = 100, - dev_percentage: float = 0, - parameters: dict = None, + id: Optional[Text] = "", + description: Optional[Text] = "", + supplier: Optional[Text] = "aiXplain", + version: Optional[Text] = "1.0", + train_percentage: Optional[float] = 100, + dev_percentage: Optional[float] = 0, + prompt: Optional[Text] = None, + hyperparameters: Optional[Hyperparameters] = None, + peft: Optional[Peft] = None, **additional_info, ) -> None: """Create a FineTune with the necessary information. @@ -80,6 +88,9 @@ def __init__( version (Text, optional): Version of the FineTune. Defaults to "1.0". train_percentage (float, optional): Percentage of training samples. Defaults to 100. dev_percentage (float, optional): Percentage of development samples. Defaults to 0. + prompt (Text, optional): Fine-tuning prompt. Defaults to None. + hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None. + peft (Peft, optional): PEFT (Parameter-Efficient Fine-Tuning) configuration. Defaults to None. **additional_info: Additional information to be saved with the FineTune. """ super().__init__(id, name, description, supplier, version) @@ -88,7 +99,9 @@ def __init__( self.cost = cost self.train_percentage = train_percentage self.dev_percentage = dev_percentage - self.parameters = parameters + self.prompt = prompt + self.hyperparameters = hyperparameters + self.peft = peft self.additional_info = additional_info self.backend_url = config.BACKEND_URL self.api_key = config.TEAM_API_KEY @@ -104,23 +117,28 @@ def start(self) -> Model: try: url = urljoin(self.backend_url, f"sdk/finetune") headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "application/json"} - payload = json.dumps( - { - "name": self.name, - "datasets": [ - { - "datasetId": dataset.id, - "trainSamplesPercentage": self.train_percentage, - "devSamplesPercentage": self.dev_percentage, - } - for dataset in self.dataset_list - ], - "sourceModelId": self.model.id, - "parameters": self.parameters, - } - ) + payload = { + "name": self.name, + "datasets": [ + { + "datasetId": dataset.id, + "trainSamplesPercentage": self.train_percentage, + "devSamplesPercentage": self.dev_percentage, + } + for dataset in self.dataset_list + ], + "sourceModelId": self.model.id, + } + parameters = {} + if self.prompt is not None: + parameters["prompt"] = self.prompt + if self.hyperparameters is not None: + parameters["hyperparameters"] = self.hyperparameters.to_dict() + if self.peft is not None: + parameters["peft"] = self.peft.to_dict() + payload["parameters"] = parameters logging.info(f"Start service for POST Start FineTune - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, data=payload) + r = _request_with_retry("post", url, headers=headers, json=payload) resp = r.json() logging.info(f"Response for POST Start FineTune - Name: {self.name} / Status {resp}") return ModelFactory().get(resp["id"]) diff --git a/aixplain/modules/finetune_cost.py b/aixplain/modules/finetune/cost.py similarity index 100% rename from aixplain/modules/finetune_cost.py rename to aixplain/modules/finetune/cost.py diff --git a/aixplain/factories/finetune_factory/hyperparameters.py b/aixplain/modules/finetune/hyperparameters.py similarity index 100% rename from aixplain/factories/finetune_factory/hyperparameters.py rename to aixplain/modules/finetune/hyperparameters.py diff --git a/aixplain/factories/finetune_factory/peft.py b/aixplain/modules/finetune/peft.py similarity index 100% rename from aixplain/factories/finetune_factory/peft.py rename to aixplain/modules/finetune/peft.py From 8a03a8ece503088bd9c997f0766ca075299c77bc Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli Date: Thu, 9 Nov 2023 19:20:59 -0300 Subject: [PATCH 3/6] Add dataclasses-json dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4ec45221..ab7b901e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,8 @@ dependencies = [ "validators>=0.20.0", "filetype>=1.2.0", "click>=8.1.7", - "PyYAML>=6.0.1" + "PyYAML>=6.0.1", + "dataclasses-json==0.6.1" ] [project.urls] From 634642431bbd5a755531c77edb9ba55fa05bec05 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira Date: Mon, 13 Nov 2023 21:03:55 +0000 Subject: [PATCH 4/6] Model Removal service M-5376311900 --- aixplain/factories/model_factory.py | 24 ++++++++++++++++++- .../general_assets/asset_functional_test.py | 5 ++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index 2e04bf97..4970a0ed 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -90,7 +90,7 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} else: headers = {"Authorization": f"Token {cls.api_key}", "Content-Type": "application/json"} - logging.info(f"Start service for GET Metric - {url} - {headers}") + logging.info(f"Start service for GET Model - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() # set api key @@ -379,3 +379,25 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed." logging.info(message) return response + + @classmethod + def delete(cls, model_id: Text) -> None: + """Delete Model service + + Args: + model_id (Text): model ID + """ + try: + url = urljoin(cls.backend_url, f"sdk/models/{model_id}") + if cls.aixplain_key != "": + headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {cls.api_key}", "Content-Type": "application/json"} + logging.info(f"Start service for DELETE Model - {url} - {headers}") + r = _request_with_retry("delete", url, headers=headers) + if r.status_code != 200: + raise Exception() + except Exception: + message = "Model Deletion Error: Make sure the model exists and you are the owner." + logging.error(message) + raise Exception(f"{message}") diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index d8c79594..bb3b78b3 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -68,3 +68,8 @@ def test_model_query(): models = ModelFactory.list(query=query)["results"] for model in models: assert query in model.name + + +def test_model_deletion(): + with pytest.raises(Exception): + response = ModelFactory.delete("131312") From 1a8410f5a52d6cb3211a69a3fe193b23940cb8b8 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira Date: Mon, 13 Nov 2023 21:51:16 +0000 Subject: [PATCH 5/6] Deletion model on Model class --- aixplain/factories/model_factory.py | 22 ------------------- aixplain/modules/__init__.py | 2 +- aixplain/modules/model.py | 14 ++++++++++++ .../general_assets/asset_functional_test.py | 3 ++- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index 4970a0ed..950cd25e 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -379,25 +379,3 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed." logging.info(message) return response - - @classmethod - def delete(cls, model_id: Text) -> None: - """Delete Model service - - Args: - model_id (Text): model ID - """ - try: - url = urljoin(cls.backend_url, f"sdk/models/{model_id}") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {cls.api_key}", "Content-Type": "application/json"} - logging.info(f"Start service for DELETE Model - {url} - {headers}") - r = _request_with_retry("delete", url, headers=headers) - if r.status_code != 200: - raise Exception() - except Exception: - message = "Model Deletion Error: Make sure the model exists and you are the owner." - logging.error(message) - raise Exception(f"{message}") diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index 0e5ac003..0902eaf4 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -28,6 +28,6 @@ from .metric import Metric from .model import Model from .pipeline import Pipeline -from .finetune import Finetune +from .finetune import Finetune, FinetuneCost from .benchmark import Benchmark from .benchmark_job import BenchmarkJob diff --git a/aixplain/modules/model.py b/aixplain/modules/model.py index a92bde55..440181a1 100644 --- a/aixplain/modules/model.py +++ b/aixplain/modules/model.py @@ -279,3 +279,17 @@ def check_finetune_status(self): message = f"Status {status_code} - {message}" error_message = f"Check FineTune status Model: Error {message}" logging.exception(error_message) + + def delete(self) -> None: + """Delete Model service""" + try: + url = urljoin(self.backend_url, f"sdk/models/{self.id}") + headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "application/json"} + logging.info(f"Start service for DELETE Model - {url} - {headers}") + r = _request_with_retry("delete", url, headers=headers) + if r.status_code != 200: + raise Exception() + except Exception: + message = "Model Deletion Error: Make sure the model exists and you are the owner." + logging.error(message) + raise Exception(f"{message}") diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index bb3b78b3..0c410df2 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -71,5 +71,6 @@ def test_model_query(): def test_model_deletion(): + model = ModelFactory.get("640b517694bf816d35a59125") with pytest.raises(Exception): - response = ModelFactory.delete("131312") + model.delete() From 985509e5752348cf21f61f794b1b55a24be51e16 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli Date: Tue, 14 Nov 2023 20:27:27 -0300 Subject: [PATCH 6/6] Update finetune functional test to run on all envs --- .../finetune/data/finetune_test_end2end.json | 12 ++++++++++++ .../finetune/data/finetune_test_list_data.json | 8 +------- .../finetune/finetune_functional_test.py | 17 +++++++++++------ 3 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 tests/functional/finetune/data/finetune_test_end2end.json diff --git a/tests/functional/finetune/data/finetune_test_end2end.json b/tests/functional/finetune/data/finetune_test_end2end.json new file mode 100644 index 00000000..ae7e7bb5 --- /dev/null +++ b/tests/functional/finetune/data/finetune_test_end2end.json @@ -0,0 +1,12 @@ +[ + { + "model_name": "Chat GPT 3.5", + "dataset_name": "Test text generation dataset", + "inference_data": "Hello!" + }, + { + "model_name": "GPT2", + "dataset_name": "Test text generation dataset", + "inference_data": "Hello!" + } +] \ No newline at end of file diff --git a/tests/functional/finetune/data/finetune_test_list_data.json b/tests/functional/finetune/data/finetune_test_list_data.json index 4f322075..b5b13a57 100644 --- a/tests/functional/finetune/data/finetune_test_list_data.json +++ b/tests/functional/finetune/data/finetune_test_list_data.json @@ -1,11 +1,5 @@ [ { - "function": "translation", - "source_language": {"language": "en", "dialect": ""}, - "target_language": {"language": "fr", "dialect": ""} - }, - { - "function": "speech-recognition", - "source_language": {"language": "en", "dialect": ""} + "function": "text-generation" } ] \ No newline at end of file diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index 62fe3892..f5143be6 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -24,13 +24,13 @@ from aixplain.factories import ModelFactory from aixplain.factories import DatasetFactory from aixplain.factories import FinetuneFactory -from aixplain.modules import FinetuneCost +from aixplain.modules.finetune.cost import FinetuneCost from aixplain.enums import Function, Language import pytest TIMEOUT = 20000.0 -RUN_FILE = "tests/functional/finetune/data/finetune_test_run_data.json" +RUN_FILE = "tests/functional/finetune/data/finetune_test_end2end.json" LIST_FILE = "tests/functional/finetune/data/finetune_test_list_data.json" @@ -47,9 +47,10 @@ def run_input_map(request): def list_input_map(request): return request.param -def test_run(run_input_map): - model = ModelFactory.get(run_input_map["model_id"]) - dataset_list = [DatasetFactory.get(run_input_map["dataset_id"])] + +def test_end2end_text_generation(run_input_map): + model = ModelFactory.list(query=run_input_map["model_name"], is_finetunable=True)["results"][0] + dataset_list = [DatasetFactory.list(query=run_input_map["dataset_name"])["results"][0]] finetune = FinetuneFactory.create(str(uuid.uuid4()), dataset_list, model) assert type(finetune.cost) is FinetuneCost cost_map = finetune.cost.to_dict() @@ -64,6 +65,10 @@ def test_run(run_input_map): assert status != "failed" end = time.time() assert finetune_model.check_finetune_status() == "onboarded" + result = finetune_model.run(run_input_map["inference_data"]) + assert result is not None + finetune_model.delete() + def test_list_finetunable_models(list_input_map): model_list = ModelFactory.list( @@ -71,5 +76,5 @@ def test_list_finetunable_models(list_input_map): source_languages=Language(list_input_map["source_language"]) if "source_language" in list_input_map else None, target_languages=Language(list_input_map["target_language"]) if "target_language" in list_input_map else None, is_finetunable=True, - ) + )["results"] assert len(model_list) > 0