Skip to content
Merged

Gpus #185

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions aixplain/cli_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
CLI Runner
"""
import click
from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model, deploy_huggingface_model, get_huggingface_model_status
from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model, deploy_huggingface_model, get_huggingface_model_status, list_gpus

@click.group('cli')
def cli():
Expand Down Expand Up @@ -51,11 +51,12 @@ def onboard():
create.add_command(create_asset_repo)
list.add_command(list_host_machines)
list.add_command(list_functions)
list.add_command(list_gpus)
get.add_command(asset_repo_login)
get.add_command(get_huggingface_model_status)
onboard.add_command(onboard_model)
onboard.add_command(deploy_huggingface_model)


def run_cli():
cli()
cli()
52 changes: 37 additions & 15 deletions aixplain/factories/cli/model_factory_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def list_host_machines(api_key: Optional[Text] = None) -> None:
click.echo(ret_val_yaml)

@click.command("functions")
@click.option("--verbose", default=False,
@click.option("--verbose", is_flag=True,
help="List all function details, False by default.")
@click.option("--api-key", default=None,
help="TEAM_API_KEY if not already set in environment.")
Expand All @@ -62,21 +62,37 @@ def list_functions(verbose: bool, api_key: Optional[Text] = None) -> None:
ret_val_yaml = yaml.dump(ret_val)
click.echo(ret_val_yaml)

@click.command("gpus")
@click.option("--api-key", default=None,
help="TEAM_API_KEY if not already set in environment.")
def list_gpus(api_key: Optional[Text] = None) -> None:
"""CLI wrapper function for the LIST_GPUS function in ModelFactory.

Args:
api_key (Text, optional): Team API key. Defaults to None.
Returns:
None
"""
ret_val = ModelFactory.list_gpus(api_key)
ret_val_yaml = yaml.dump(ret_val)
click.echo(ret_val_yaml)

@click.command("image-repo")
@click.option("--name", help="Model name.")
@click.option("--hosting-machine",
help="Hosting machine code obtained from LIST_HOSTS.")
@click.option("--version", help="Model version.")
@click.option("--description", help="Description of model.")
@click.option("--function", help="Function name obtained from LIST_FUNCTIONS.")
@click.option("--source-language", default="en",
help="Model source language in 2-character 639-1 code or 3-character 639-3 code.")
@click.option("--input-modality", help="Input type (text, video, image, etc.)")
@click.option("--output-modality", help="Output type (text, video, image, etc.)")
@click.option("--documentation-url", default="", help="Link to model documentation.")
@click.option("--api-key", default=None,
help="TEAM_API_KEY if not already set in environment.")
def create_asset_repo(name: Text, hosting_machine: Text, version: Text,
description: Text, function: Text,
source_language: Text,
api_key: Optional[Text] = None) -> None:
def create_asset_repo(name: Text, description: Text, function: Text,
source_language: Text, input_modality: Text,
output_modality: Text,
documentation_url: Optional[Text] = "",
api_key: Optional[Text] = None) -> None:
"""CLI wrapper function for the CREATE_ASSET_REPO function in ModelFactory.

Args:
Expand All @@ -93,9 +109,10 @@ def create_asset_repo(name: Text, hosting_machine: Text, version: Text,
Returns:
None
"""
ret_val = ModelFactory.create_asset_repo(name, hosting_machine, version,
description, function,
source_language, api_key)
ret_val = ModelFactory.create_asset_repo(name, description, function,
source_language, input_modality,
output_modality, documentation_url,
api_key)
ret_val_yaml = yaml.dump(ret_val)
click.echo(ret_val_yaml)

Expand All @@ -119,8 +136,10 @@ def asset_repo_login(api_key: Optional[Text] = None) -> None:
@click.option("--model-id", help="Model ID from CREATE_IMAGE_REPO.")
@click.option("--image-tag", help="The tag of the image that you would like hosted.")
@click.option("--image-hash", help="The hash of the image you would like onboarded.")
@click.option("--host-machine", default="", help="The machine on which to host the model.")
@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.")
def onboard_model(model_id: Text, image_tag: Text, image_hash: Text,
host_machine: Optional[Text] = "",
api_key: Optional[Text] = None) -> None:
"""CLI wrapper function for the ONBOARD_MODEL function in ModelFactory.

Expand All @@ -132,17 +151,20 @@ def onboard_model(model_id: Text, image_tag: Text, image_hash: Text,
Returns:
None
"""
ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash, api_key)
ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash,
host_machine, api_key)
ret_val_yaml = yaml.dump(ret_val)
click.echo(ret_val_yaml)

@click.command("hf-model")
@click.option("--name", help="User-defined name for Hugging Face model.")
@click.option("--hf-repo-id", help="Repository ID from Hugging Face in {supplier}/{model name} form.")
@click.option("--hf-token", help="Hugging Face token used to authenticate to this model.")
@click.option("--revision", default="", help="Commit hash of repository.")
@click.option("--hf-token", default=None, help="Hugging Face token used to authenticate to this model.")
@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.")
def deploy_huggingface_model(name: Text, hf_repo_id: Text,
hf_token: Optional[Text] = None,
revision: Optional[Text] = None,
api_key: Optional[Text] = None) -> None:
"""CLI wrapper function for the DEPLOY_HUGGINGFACE_MODEL function in ModelFactory.

Expand All @@ -153,7 +175,7 @@ def deploy_huggingface_model(name: Text, hf_repo_id: Text,
Returns:
None
"""
ret_val = ModelFactory.deploy_huggingface_model(name, hf_repo_id, hf_token, api_key)
ret_val = ModelFactory.deploy_huggingface_model(name, hf_repo_id, revision, hf_token, api_key)
ret_val_yaml = yaml.dump(ret_val)
click.echo(ret_val_yaml)

Expand All @@ -172,4 +194,4 @@ def get_huggingface_model_status(model_id: Text, api_key: Optional[Text] = None)
"""
ret_val = ModelFactory.get_huggingface_model_status(model_id, api_key)
ret_val_yaml = yaml.dump(ret_val)
click.echo(ret_val_yaml)
click.echo(ret_val_yaml)
93 changes: 65 additions & 28 deletions aixplain/factories/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,25 @@ def list_host_machines(cls, api_key: Optional[Text] = None) -> List[Dict]:
for dictionary in response_dicts:
del dictionary["id"]
return response_dicts

@classmethod
def list_gpus(cls, api_key: Optional[Text] = None) -> List[List[Text]]:
"""List GPU names on which you can host your language model.

Args:
api_key (Text, optional): Team API key. Defaults to None.

Returns:
List[List[Text]]: List of all available GPUs and their prices.
"""
gpu_url = urljoin(config.BACKEND_URL, "sdk/model-onboarding/gpus")
if api_key:
headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
else:
headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"}
response = _request_with_retry("get", gpu_url, headers=headers)
response_list = json.loads(response.text)
return response_list

@classmethod
def list_functions(cls, verbose: Optional[bool] = False, api_key: Optional[Text] = None) -> List[Dict]:
Expand Down Expand Up @@ -310,12 +329,13 @@ def list_functions(cls, verbose: Optional[bool] = False, api_key: Optional[Text]
def create_asset_repo(
cls,
name: Text,
hosting_machine: Text,
version: Text,
description: Text,
function: Text,
source_language: Text,
api_key: Optional[Text] = None,
input_modality: Text,
output_modality: Text,
documentation_url: Optional[Text] = "",
api_key: Optional[Text] = None
) -> Dict:
"""Creates an image repository for this model and registers it in the
platform backend.
Expand All @@ -342,27 +362,36 @@ def create_asset_repo(
function_id = function_dict["id"]
if function_id is None:
raise Exception("Invalid function name")
create_url = urljoin(config.BACKEND_URL, "sdk/models/register")
create_url = urljoin(config.BACKEND_URL, f"sdk/models/onboard")
logging.debug(f"URL: {create_url}")
if api_key:
headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"}
else:
headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"}
always_on = False
is_async = False # Hard-coded to False for first release

payload = {
"name": name,
"hostingMachine": hosting_machine,
"alwaysOn": always_on,
"version": version,
"description": description,
"function": function_id,
"isAsync": is_async,
"sourceLanguage": source_language,
"model": {
"name": name,
"description": description,
"connectionType": [
"synchronous"
],
"function": function_id,
"modalities": [
f"{input_modality}-{output_modality}"
],
"documentationUrl": documentation_url,
"sourceLanguage": source_language
},
"source": "aixplain-ecr",
"onboardingParams": {
}
}
payload = json.dumps(payload)
logging.debug(f"Body: {str(payload)}")
response = _request_with_retry("post", create_url, headers=headers, data=payload)
response = _request_with_retry("post", create_url, headers=headers, json=payload)

assert response.status_code == 201

return response.json()

@classmethod
Expand All @@ -379,20 +408,23 @@ def asset_repo_login(cls, api_key: Optional[Text] = None) -> Dict:
login_url = urljoin(config.BACKEND_URL, "sdk/ecr/login")
logging.debug(f"URL: {login_url}")
if api_key:
headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"}
headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
else:
headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"}
headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"}
response = _request_with_retry("post", login_url, headers=headers)
print(f"Response: {response}")
response_dict = json.loads(response.text)
return response_dict

@classmethod
def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_key: Optional[Text] = None) -> Dict:
def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, host_machine: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict:
"""Onboard a model after its image has been pushed to ECR.

Args:
model_id (Text): Model ID obtained from CREATE_ASSET_REPO.
image_tag (Text): Image tag to be onboarded.
image_hash (Text): Image digest.
host_machine (Text, optional): Machine on which to host model.
api_key (Text, optional): Team API key. Defaults to None.
Returns:
Dict: Backend response
Expand All @@ -403,18 +435,18 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke
headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"}
else:
headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"}
payload = {"image": image_tag, "sha": image_hash}
payload = json.dumps(payload)
payload = {"image": image_tag, "sha": image_hash, "hostMachine": host_machine}
logging.debug(f"Body: {str(payload)}")
response = _request_with_retry("post", onboard_url, headers=headers, data=payload)
message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed."
logging.info(message)
response = _request_with_retry("post", onboard_url, headers=headers, json=payload)
if response.status_code == 201:
message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed."
logging.info(message)
else:
message = "An error has occurred. Please make sure your model_id is valid and your host_machine, if set, is a valid option from the LIST_GPUS function."
return response

@classmethod
def deploy_huggingface_model(
cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None
) -> Dict:
def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, revision: Optional[Text] = "", hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict:
"""Onboards and deploys a Hugging Face large language model.

Args:
Expand All @@ -441,7 +473,12 @@ def deploy_huggingface_model(
"sourceLanguage": "en",
},
"source": "huggingface",
"onboardingParams": {"hf_model_name": model_name, "hf_supplier": supplier, "hf_token": hf_token},
"onboardingParams": {
"hf_supplier": supplier,
"hf_model_name": model_name,
"hf_token": hf_token,
"revision": revision
}
}
response = _request_with_retry("post", deploy_url, headers=headers, json=body)
logging.debug(response.text)
Expand Down
1 change: 0 additions & 1 deletion aixplain/modules/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ def run(
reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None.
"""
from aixplain.factories.model_factory import ModelFactory

model = ModelFactory.get(self.id)
payload = {
"function": self.function,
Expand Down
4 changes: 0 additions & 4 deletions aixplain/modules/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
"""
from aixplain.enums.asset_status import AssetStatus
from aixplain.modules.finetune.status import FinetuneStatus

headers = {"x-api-key": self.api_key, "Content-Type": "application/json"}
resp = None
try:
Expand All @@ -278,15 +277,13 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
finetune_status = AssetStatus(resp["finetuneStatus"])
model_status = AssetStatus(resp["modelStatus"])
logs = sorted(resp["logs"], key=lambda x: float(x["epoch"]))

target_epoch = None
if after_epoch is not None:
logs = [log for log in logs if float(log["epoch"]) > after_epoch]
if len(logs) > 0:
target_epoch = float(logs[0]["epoch"])
elif len(logs) > 0:
target_epoch = float(logs[-1]["epoch"])

if target_epoch is not None:
log = None
for log_ in logs:
Expand All @@ -298,7 +295,6 @@ def check_finetune_status(self, after_epoch: Optional[int] = None):
log["trainLoss"] = log_["trainLoss"]
if log_["evalLoss"] is not None:
log["evalLoss"] = log_["evalLoss"]

status = FinetuneStatus(
status=finetune_status,
model_status=model_status,
Expand Down
Loading