In [3]:
%load_ext lab_black


def deploy_model_with_kserve(
    project_name: str,
    model_version: int,
    explainer_type: str = None,
    kserve_version: str = "v1beta1",
    s3_bucket: str = "projects",
) -> str:
    """
    Deploys a model using KServe and Trino as backend.

            Parameters:
                    project_name: Name of the project. Must be unique for the targeted namespace and conform Kubernetes naming conventions. Example: my-model.
                    explainer_type: Type of Alibi explanation. If None, explanations are not provided. Example: AnchorTabular.
                    kserve_version: KServe API version. Example: v1beta1.
                    model_version: Version of the deployed model. Relevant to match explainer version to model version. Example: 1.
                    s3_bucket: Name of the s3 bucket in which model projects reside. Example: projects.
            Returns:
                    endpoint: REST endpoint where the model can be queried. Example: https://my-model-user-example-com.apps.myorg.com.
    """
    from kubernetes import client, config
    from kserve import KServeClient
    from kserve import constants
    from kserve import utils
    from kserve import V1beta1AlibiExplainerSpec
    from kserve import V1beta1ExplainerSpec
    from kserve import V1beta1InferenceService
    from kserve import V1beta1InferenceServiceSpec
    from kserve import V1beta1PredictorSpec
    from kserve import V1beta1TritonSpec
    import logging
    import sys

    logging.basicConfig(
        stream=sys.stdout,
        level=print,
        format="%(levelname)s %(asctime)s: %(message)s",
    )

    try:
        model_version = int(model_version)
    except ValueError:
        logging.warning(
            "Could not parse model version. Continuing with default value 1..."
        )
        model_version = 1

    # See: https://www.kubeflow.org/docs/external-add-ons/kserve/first_isvc_kserve/
    print("Initializing environment...")
    config.load_incluster_config()
    namespace = utils.get_default_target_namespace()
    api_version = constants.KSERVE_GROUP + "/" + kserve_version
    storage_uri: str = f"s3://{s3_bucket}/{project_name}"

    print("Initializing inference service specification...")
    resources_spec = client.V1ResourceRequirements(
        requests={"cpu": "1000m", "memory": "8Gi"},
        limits={"cpu": "2000m", "memory": "16Gi"},
    )

    # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1TritonSpec/
    triton_spec = V1beta1TritonSpec(
        args=["--strict-model-config=false"],
        runtime_version="22.03-py3",
        storage_uri=storage_uri,
        resources=resources_spec,
    )

    # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1PredictorSpec/
    predictor_spec = V1beta1PredictorSpec(
        service_account_name="kserve-inference-sa", triton=triton_spec
    )

    if explainer_type:
        print("Found an explainer, which will be co-deployed.")
        # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1AlibiExplainerSpec/
        alibi_spec = V1beta1AlibiExplainerSpec(
            type=explainer_type,
            storage_uri=f"{storage_uri}/explainer/{model_version}",  # /explainer.alibi",
            resources=resources_spec,
        )

        # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1ExplainerSpec/
        explainer_spec = V1beta1ExplainerSpec(
            min_replicas=1,
            alibi=alibi_spec,
        )

    # See: https://kserve.github.io/website/master/sdk_docs/docs/V1beta1InferenceServiceSpec/#properties
    inference_service_spec = V1beta1InferenceService(
        api_version=api_version,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=project_name,
            namespace=namespace,
            annotations={"sidecar.istio.io/inject": "false"},
        ),
        spec=V1beta1InferenceServiceSpec(
            predictor=predictor_spec,
            explainer=explainer_spec if explainer_type else None,
        ),
    )

    kserve_client = KServeClient()

    print("Checking for existing inference service...")
    try:
        inference_service = kserve_client.get(project_name, namespace=namespace)
        print(f"Received: {inference_service}")

        if "status" in inference_service:
            print("Inference service already exists.")

            print("Patching inference service with new model version...")
            kserve_client.patch(project_name, inference_service_spec)
        else:
            print("Creating inference service...")
            kserve_client.create(inference_service_spec)
    except Exception:
        print("Creating new inference service...")
        kserve_client.create(inference_service_spec)

    print("Waiting for inference service to start...")
    kserve_client.get(
        project_name, namespace=namespace, watch=True, timeout_seconds=180
    )

    print("Getting inference URL...")
    inference_response = kserve_client.get(project_name, namespace=namespace)
    inference_url = inference_response["status"]["address"]["url"]
    print(f"inference URL: {inference_url}")

    print("Finished.")
    return inference_url

In [4]:
arguments = {
    "blackboard": "artefacts",
    "model_name": "fraud-detection-ec1d0",
    "cluster_configuration_secret": "",
    "training_gpus": "1",
    "training_node_selector": "",
}

In [5]:
deploy_model_with_kserve(arguments['model_name'], 1)

ImportError: cannot import name 'KServeClient' from 'kserve' (unknown location)