In [None]:
!pip install ibm-watsonx-ai | tail -n 1
!pip install autoai-libs~=2.0 | tail -n 1
!pip install scikit-learn==1.3.* | tail -n 1
!pip install -U lale~=0.8.3 | tail -n 1
!pip install snapml==1.14.* | tail -n 1

Filter warnings for this notebook.

In [None]:
import warnings

warnings.filterwarnings('ignore')

In [None]:
from ibm_watsonx_ai.helpers import DataConnection
from ibm_watsonx_ai.helpers import ContainerLocation

training_data_references = [
    DataConnection(
        data_asset_id='9bd2900f-a225-4b9d-9cae-5235fe121ed6'
    ),
]
training_result_reference = DataConnection(
    location=ContainerLocation(
        path='auto_ml/c28684a6-0f07-4256-b9a9-3107e86b3248/wml_data/88d09a91-7701-4be7-965c-9e5259cb05f8/data/automl',
        model_location='auto_ml/c28684a6-0f07-4256-b9a9-3107e86b3248/wml_data/88d09a91-7701-4be7-965c-9e5259cb05f8/data/automl/model.zip',
        training_status='auto_ml/c28684a6-0f07-4256-b9a9-3107e86b3248/wml_data/88d09a91-7701-4be7-965c-9e5259cb05f8/training-status.json'
    )
)

In [None]:
experiment_metadata = dict(
    prediction_type='binary',
    prediction_column='IS_TENT',
    holdout_size=0.1,
    scoring='accuracy',
    csv_separator=',',
    random_state=33,
    max_number_of_estimators=2,
    training_data_references=training_data_references,
    training_result_reference=training_result_reference,
    deployment_url='https://au-syd.ml.cloud.ibm.com',
    project_id='8adc62db-0436-4df4-9acd-5a52185b6e22',
    positive_label='TRUE',
    drop_duplicates=True,
    include_batched_ensemble_estimators=[],
    feature_selector_mode='auto'
)

In [None]:
import os, ast
CPU_NUMBER = 4
if 'RUNTIME_HARDWARE_SPEC' in os.environ:
    CPU_NUMBER = int(ast.literal_eval(os.environ['RUNTIME_HARDWARE_SPEC'])['num_cpu'])

In [None]:
import getpass

api_key = getpass.getpass("Please enter your api key (press enter): ")

In [None]:
from ibm_watsonx_ai import Credentials

credentials = Credentials(
    api_key=api_key,
    url=experiment_metadata['deployment_url']
)

In [None]:
from ibm_watsonx_ai import APIClient

client = APIClient(credentials)

if 'space_id' in experiment_metadata:
    client.set.default_space(experiment_metadata['space_id'])
else:
    client.set.default_project(experiment_metadata['project_id'])

training_data_references[0].set_client(client)

<a id="inspection"></a>
# Pipeline inspection

In [None]:
X_train, X_test, y_train, y_test = training_data_references[0].read(experiment_metadata=experiment_metadata, with_holdout_split=True, use_flight=True)

In [None]:
from autoai_libs.transformers.exportable import NumpyColumnSelector
from autoai_libs.transformers.exportable import CompressStrings
from autoai_libs.transformers.exportable import NumpyReplaceMissingValues
from autoai_libs.transformers.exportable import NumpyReplaceUnknownValues
from autoai_libs.transformers.exportable import boolean2float
from autoai_libs.transformers.exportable import CatImputer
from autoai_libs.transformers.exportable import CatEncoder
import numpy as np
from autoai_libs.transformers.exportable import float32_transform
from sklearn.pipeline import make_pipeline
from autoai_libs.transformers.exportable import FloatStr2Float
from autoai_libs.transformers.exportable import NumImputer
from autoai_libs.transformers.exportable import OptStandardScaler
from sklearn.pipeline import make_union
from autoai_libs.transformers.exportable import NumpyPermuteArray
from snapml import SnapDecisionTreeClassifier

In [None]:
numpy_column_selector_0 = NumpyColumnSelector(columns=[0, 1, 2, 3, 4])
compress_strings = CompressStrings(
    compress_type="hash",
    dtypes_list=[
        "char_str", "float_int_num", "char_str", "char_str", "char_str",
    ],
    missing_values_reference_list=["", "-", "?", float("nan")],
    misslist_list=[[], [], [], [], []],
)
numpy_replace_missing_values_0 = NumpyReplaceMissingValues(
    filling_values=float("nan"), missing_values=[]
)
numpy_replace_unknown_values = NumpyReplaceUnknownValues(
    filling_values=float("nan"),
    filling_values_list=[
        float("nan"), 100001, float("nan"), float("nan"), float("nan"),
    ],
    missing_values_reference_list=["", "-", "?", float("nan")],
)
cat_imputer = CatImputer(
    missing_values=float("nan"),
    sklearn_version_family="1",
    strategy="most_frequent",
)
cat_encoder = CatEncoder(
    dtype=np.float64,
    handle_unknown="error",
    sklearn_version_family="1",
    encoding="ordinal",
    categories="auto",
)
pipeline_0 = make_pipeline(
    numpy_column_selector_0,
    compress_strings,
    numpy_replace_missing_values_0,
    numpy_replace_unknown_values,
    boolean2float(),
    cat_imputer,
    cat_encoder,
    float32_transform(),
)
numpy_column_selector_1 = NumpyColumnSelector(columns=[5])
float_str2_float = FloatStr2Float(
    dtypes_list=["float_num"], missing_values_reference_list=[]
)
numpy_replace_missing_values_1 = NumpyReplaceMissingValues(
    filling_values=float("nan"), missing_values=[]
)
num_imputer = NumImputer(missing_values=float("nan"), strategy="median")
opt_standard_scaler = OptStandardScaler(use_scaler_flag=False)
pipeline_1 = make_pipeline(
    numpy_column_selector_1,
    float_str2_float,
    numpy_replace_missing_values_1,
    num_imputer,
    opt_standard_scaler,
    float32_transform(),
)
union = make_union(pipeline_0, pipeline_1)
numpy_permute_array = NumpyPermuteArray(
    axis=0, permutation_indices=[0, 1, 2, 3, 4, 5]
)
snap_decision_tree_classifier = SnapDecisionTreeClassifier(
    n_jobs=CPU_NUMBER, random_state=33
)


In [None]:
pipeline = make_pipeline(
    union, numpy_permute_array, snap_decision_tree_classifier
)

In [None]:
from sklearn.metrics import get_scorer

scorer = get_scorer(experiment_metadata['scoring'])

In [None]:
pipeline.fit(X_train.values, y_train.values.ravel());

In [None]:
score = scorer(pipeline, X_test.values, y_test.values)
print(score)

In [None]:
pipeline.predict(X_test.values[:5])

In [None]:
model_metadata = {
    client.repository.ModelMetaNames.NAME: 'P2 - Pretrained AutoAI pipeline'
}

stored_model_details = client.repository.store_model(model=pipeline, meta_props=model_metadata, experiment_metadata=experiment_metadata)

In [None]:
stored_model_details