In [3]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Experiments: Autologging

<table align="left">

  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/experiments/autologging.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/experiments/autologging.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/experiments/autologging.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>
</table>

**_NOTE_**: This notebook has been tested in the following environment:

* Python version = 3.9

## Overview

As part of the data science team, you want to try different modeling approaches during experimentation phase.To guarantee reproducibility, each approach has different parameters that you need to manually track This is a time consuming task. To address this challenge, Vertex AI SDK introduces autologging, a one-line code SDK capability which leverages MLflow to provide automatic metrics and parameters tracking associated with your  Vertex AI Experiments and experiment runs. Learn more about [Autologging data to an experiment run](https://cloud.google.com/vertex-ai/docs/experiments/autolog-data).

### Objective

In this tutorial, you learn how to use `Vertex AI Autologging`.

This tutorial uses the following Google Cloud ML services and resources:

- Vertex AI Experiments

The steps performed include:

- Enable autologging in the Vertex AI SDK.
- Train scikit-learn model and see the resulting experiment run with metrics and parameters autologged to Vertex AI Experiments without setting an experiment run.
- Train Tensorflow model, check autologged metrics and parameters to Vertex AI Experiments by manually setting an experiment run with `aiplatform.start_run()` and `aiplatform.end_run()`.
- Disable autologging in the Vertex AI SDK, train a PyTorch model and check that none of the parameters or metrics are logged.


### Dataset

The dataset is the [UCI Car Evaluation dataset](https://archive-beta.ics.uci.edu/dataset/19/car+evaluation), which is derived from simple hierarchical decision model and it contains attributions to predict car evaluation class.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI Experiments
* Vertex AI Tensorboard
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing),
and [Cloud Storage pricing](https://cloud.google.com/storage/pricing),
and use the [Pricing Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Installation

Install the following packages required to execute this notebook.

In [4]:
# Install the packages
USER = ""
! pip3 install {USER} --upgrade google-cloud-aiplatform tensorflow
! pip3 install {USER} --upgrade pandas scikit-learn category_encoders torch torchdata torchmetrics mlflow
! pip3 install {USER} --upgrade protobuf==3.20.3


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.24.0-py2.py3-none-any.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m43.4 MB/s[0m eta [36m0:00:00[0m
Collecting shapely<2.0.0
  Downloading Shapely-1.8.5.post1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m83.7 MB/s[0m eta [36m0:00:00[0m
Collecting google-cloud-resource-manager<3.0.0dev,>=1.3.3
  Downloading google_cloud_resource_manager-1.10.0-py2.py3-none-any.whl (321 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m321.4/321.4 kB[0m [31m35.1 MB/s[0m eta [36m0:00:00[0m
Collecting packaging<22.0.0dev,>=14.3
  Downloading packaging-21.3-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 kB

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandas
  Downloading pandas-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m61.5 MB/s[0m eta [36m0:00:00[0m
Collecting category_encoders
  Downloading category_encoders-2.6.0-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.2/81.2 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics
  Downloading torchmetrics-0.11.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m49.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mlflow
  Downloading mlflow-2.3.0-py3-none-any.whl (17.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m67.8 MB/s[0m eta [36m0:00:00[0m
Collecting docker<7,>=4.0.0
  Downloading docke

### Colab only: Uncomment the following cell to restart the kernel.

In [5]:
#Automatically restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

## Before you begin

### Set up your Google Cloud project

**The following steps are required, regardless of your notebook environment.**

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

2. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

3. [Enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

4. If you are running this notebook locally, you need to install the [Cloud SDK](https://cloud.google.com/sdk).

#### Set your project ID

**If you don't know your project ID**, try the following:
* Run `gcloud config list`.
* Run `gcloud projects list`.
* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)

In [1]:
PROJECT_ID = "gbk-poc-datamvp-poc1"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

Updated property [core/project].


#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [2]:
REGION = "europe-west4"  # @param {type: "string"}

#### UUID

If you are in a live tutorial session, you might be using a shared test account or project. To avoid name collisions between users on resources created, you create a uuid for each instance session, and append it onto the name of resources you create in this tutorial.

In [3]:
import random
import string


# Generate a uuid of length 8
def generate_uuid():
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=8))


UUID = generate_uuid()

### Authenticate your Google Cloud account

Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below.

**3. Colab, uncomment and run:**

In [4]:
from google.colab import auth

auth.authenticate_user()

### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

In [5]:
BUCKET_URI = "gs://bk-poc-datamvp-poc1-elenamatay"  # @param {type:"string"}

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [6]:
! gsutil mb -l $REGION -p $PROJECT_ID $BUCKET_URI

Creating gs://bk-poc-datamvp-poc1-elenamatay/...
ServiceException: 409 A Cloud Storage bucket named 'bk-poc-datamvp-poc1-elenamatay' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


### Set up project template

Set the folder you use in this tutorial.

In [7]:
import os

tutorial_path = os.path.join(os.getcwd(), "sdk_autologging_tutorial")
data_path = os.path.join(tutorial_path, "data")

for path in tutorial_path, data_path:
    os.makedirs(path, exist_ok=True)

### Download dataset

Download the car evaluation dataset from the public Cloud Storage bucket.

In [8]:
from urllib import request

DATA_URL = "http://cloud-samples-data.storage.googleapis.com/vertex-ai/dataset-management/datasets/uci_car_eval/car_evaluation_preprocessed.csv"
data_filepath = os.path.join(data_path, "car_evaluation_data.csv")
request.urlretrieve(DATA_URL, data_filepath)

import pandas as pd

COLUMN_NAMES = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"]
df = pd.read_csv(data_filepath)
df["class"] = df["class"].replace({"unacc": 0, "acc": 0, "good": 1, "vgood": 1})

processed_data_filepath = os.path.join(data_path, "car_evaluation_preprocessed.csv")
df.to_csv(processed_data_filepath, index=False)

In [9]:
!head {processed_data_filepath} -n 5

buying,maint,doors,persons,lug_boot,safety,class
vhigh,vhigh,2,2,small,low,0
vhigh,vhigh,2,2,small,med,0
vhigh,vhigh,2,2,small,high,0
vhigh,vhigh,2,2,med,low,0


### Import libraries

Import the Vertex AI SDK to log experiments in Vertex AI Experiments.

In [10]:
from google.cloud import aiplatform as vertex_ai

### Helper functions

To run experiments it is not uncommon to define experiment helpers, one per each modelling approach you plan to evaluate. Below you define the following experiment helpers:

*   `train_sklearn_model`: A helper function to train a Decision Tree model using Sklearn.
*   `train_tensorflow_model`: A helper function to train a simple model using Tensorflow.
*   `train_pytorch_model`: A helper function to train a simple neural network using PyTorch.


In [11]:
def set_seed(seed: int):
    """
    A function to set the seed for reproducibility.
    Args:
        seed: Seed to be set
    Returns:
        None
    """
    import random

    import numpy as np
    import tensorflow as tf
    import torch

    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)


def train_sklearn_model(data_path: str, test_size: int, max_depth: int):
    """
    A function to train a Decision Tree model using sklearn.
    Args:
        data_path: Path to the data
        test_size: Size of the test set
        max_depth: Maximum depth of the Decision Tree
    Returns:
        None
    """

    # Libraries
    import pandas as pd
    from category_encoders import OrdinalEncoder
    from sklearn.metrics import accuracy_score
    from sklearn.model_selection import train_test_split
    from sklearn.pipeline import Pipeline
    from sklearn.tree import DecisionTreeClassifier

    # Read data
    print("Reading data...")
    df = pd.read_csv(data_path)

    # Train, test split
    print("Generating train and test data...")
    x = df[["buying", "maint", "doors", "persons", "lug_boot", "safety"]]
    y = df[["class"]]
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=test_size, shuffle=True
    )

    # Build pipeline
    print("Building pipeline...")
    pipe = Pipeline(
        [
            ("encoder", OrdinalEncoder()),
            ("model", DecisionTreeClassifier(criterion="gini", max_depth=max_depth)),
        ]
    )

    # Train model
    print("Training model...")
    pipe.fit(x_train, y_train)

    # Evaluate model
    print("Evaluating model...")
    y_pred = pipe.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("accurancy", round(accuracy, 3))


def train_tensorflow_model(
    data_path: str, test_size: float, batch_size: int, epochs: int
):
    """
    A function to train a TF model.
    Args:
        data_path: Path to the data
        test_size: Size of the test set
        batch_size: Batch size
        epochs: Number of epochs
    Returns:
        None
    """
    # Libraries
    import tensorflow as tf

    # Variables
    dataset_size = 1729
    features_values = {
        "buying": ["vhigh", "high", "med", "low"],
        "maint": ["vhigh", "high", "med", "low"],
        "doors": ["2", "3", "4", "5more"],
        "persons": ["2", "4", "more"],
        "lug_boot": ["small", "med", "big"],
        "safety": ["low", "med", "high"],
    }

    # Helpers
    def get_input_layer(features_vocabulary):
        input_map = {}
        for cat_name, cat_values in features_vocabulary.items():
            input_map[cat_name] = tf.keras.Input(
                shape=(1,), name=cat_name, dtype="string"
            )
        return input_map

    def get_features_layer(inputs_map, features_vocabulary):
        features_map = {}
        for cat_name, cat_values in features_vocabulary.items():
            # Calculate categories
            cat_index = tf.keras.layers.StringLookup(
                vocabulary=cat_values, max_tokens=5
            )(inputs_map[cat_name])
            # Create encoding layer
            cat_layer = tf.keras.layers.CategoryEncoding(num_tokens=5)(cat_index)
            features_map[cat_name] = cat_layer
        return features_map

    # Read data
    print("Reading data...")
    car_dataset = tf.data.experimental.make_csv_dataset(
        data_path,
        column_names=[
            "buying",
            "maint",
            "doors",
            "persons",
            "lug_boot",
            "safety",
            "class",
        ],
        label_name="class",
        batch_size=batch_size,
    )

    # Generating Train, test split
    print("Generating train and test data...")
    train_size = int(0.8 * dataset_size)
    test_size = int(test_size * dataset_size)
    train_dataset = car_dataset.take(train_size)
    test_dataset = car_dataset.skip(train_size).take(test_size)

    # Build model
    print("Building model...")
    inputs_layer = get_input_layer(features_values)
    features_layer = get_features_layer(inputs_layer, features_values)
    x = tf.keras.layers.Concatenate()(features_layer.values())
    x = tf.keras.layers.Dense(10, activation="relu")(x)
    x = tf.keras.layers.Dense(5, activation="relu")(x)
    output_layer = tf.keras.layers.Dense(1)(x)
    model = tf.keras.Model(inputs=inputs_layer.values(), outputs=output_layer)

    # Compile model
    model.compile(
        optimizer="adam",
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=["accuracy"],
    )

    # Fit the model
    print("Training model...")
    model.fit(
        train_dataset,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=test_dataset,
    )

def train_xgboost_model(data_path: str, test_size: int, max_depth: int, n_estimators: int,  enable_categorical: bool=True):
    """
    A function to train an XGBoost model.
    Args:
        data_path: Path to the data
        test_size: Size of the test set
        max_depth: Maximum depth of the Decision Tree
        n_estimators: Number of trees
        enable_categorical: Whether to enable categorical features
    Returns:
        None
    """

    # Libraries
    import pandas as pd
    from xgboost import XGBClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelEncoder

    # Read data
    print("Reading data...")
    df = pd.read_csv(data_path)

    # Convert categorical columns to numerical values
    for column in df.select_dtypes(include='object').columns:
        label_encoder = LabelEncoder()
        df[column] = label_encoder.fit_transform(df[column])

    # Train, test split
    print("Generating train and test data...")
    x = df[["buying", "maint", "doors", "persons", "lug_boot", "safety"]]
    y = df[["class"]]
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=test_size, shuffle=True
    )

    # Train model
    print("Training model...")
    clf = XGBClassifier(
        max_depth=max_depth, n_estimators=n_estimators, objective="binary:logistic"
    )
    clf.fit(x_train, y_train)

    # Evaluate model
    print("Evaluating model...")
    y_pred = clf.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("accuracy", round(accuracy, 3))


def train_pytorch_model(
    data_path: str, test_size: float, batch_size: int, lr: float, epochs: int, seed: int
):

    # Libraries
    import numpy as np
    import torch
    import torch.nn as nn
    import torchmetrics
    from torch.utils.data import DataLoader
    from torchdata import datapipes

    # Variables
    seed = 8
    features_map = {
        0: {"low": 0, "med": 1, "high": 2, "vhigh": 3},
        1: {"low": 0, "med": 1, "high": 2, "vhigh": 3},
        2: {"2": 0, "3": 1, "4": 2, "5more": 3},
        3: {"2": 0, "4": 1, "more": 2},
        4: {"small": 0, "med": 1, "big": 2},
        5: {"low": 0, "med": 1, "high": 2},
    }
    dataset_length = 1729

    # Helpers
    def row_processor(r):
        for i, value in enumerate(r[:-1]):
            r[i] = features_map[i][value]
        return {
            "data": np.array(r[:-1], dtype=np.float64),
            "labels": np.array(r[-1], dtype=np.float64),
        }

    # Model definition
    class SimpleNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            self.linear_relu = nn.Sequential(
                nn.Linear(6, 12, dtype=torch.float64),
                nn.ReLU(),
                nn.Linear(12, 6, dtype=torch.float64),
                nn.ReLU(),
                nn.Linear(6, 3, dtype=torch.float64),
                nn.ReLU(),
                nn.Linear(3, 1, dtype=torch.float64),
            )

        def forward(self, x):
            logits = self.linear_relu(x)
            return logits

    # Read data
    print("Reading and preparing data...")
    read_dp = datapipes.iter.FileLister(data_path)
    open_dp = datapipes.iter.FileOpener(read_dp)
    parse_dp = datapipes.iter.CSVParser(open_dp, delimiter=",", skip_lines=1)
    train_dp, test_dp = datapipes.iter.RandomSplitter(
        parse_dp,
        weights={"train": 1 - test_size, "test": test_size},
        total_length=dataset_length,
        seed=seed,
    )
    map_train_dp = datapipes.iter.Mapper(train_dp, row_processor)
    map_test_dp = datapipes.iter.Mapper(test_dp, row_processor)
    train_dataloader = DataLoader(map_train_dp, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(map_test_dp, batch_size=batch_size, shuffle=False)

    # Build model
    print("Building model...")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = SimpleNetwork().to(device)
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # Train model
    print("Training model...")
    model.train()
    for t in range(epochs):
        batch = 0
        for row in iter(train_dataloader):
            features, labels = row["data"].to(device), row["labels"].to(device)
            train_predictions = model(features)
            train_prediction, _ = torch.max(train_predictions, 1)
            train_loss = loss_fn(train_prediction, labels)

            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            batch += 1
            print(f"Epoch {t + 1} - Batch {batch} - Loss {train_loss.item():.4f}")

    # Test model
    print("Evaluating model...")
    metric = torchmetrics.classification.BinaryAccuracy()
    metric_values = []
    model.eval()
    for t in range(epochs):
        batch = 0
        with torch.no_grad():
            for row in iter(test_dataloader):
                features, labels = row["data"].to(device), row["labels"].to(device)
                val_predictions = model(features)
                val_prediction, _ = torch.max(val_predictions, 1)
                metric.update(val_prediction, labels)
        accuracy = metric.compute()
        metric_values.append(accuracy)
        metric.reset()

        batch += 1
        print(f"Epoch {t + 1} - Batch {batch} - Accuracy {accuracy:.4f}")

### Initialize Vertex AI SDK for Python and set seed for reproducibility

Initialize the Vertex AI SDK for Python for your project and set seed to guarantee reproducibility.

In [12]:
vertex_ai.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)
set_seed(8)

## Model experimentation using autologging with Vertex AI Experiments

Vertex AI Experiments Autologging allows you to run experiments and autologging parameters and metrics of different ML frameworks.

After initiating an Vertex AI Experiment, enable autologging using `vertex_ai.autolog()`.

There are two ways to use Autologging:

1.   *With automatic experiment run creation*
2.   *With user experiment run creation*

With *automatic experiment run creation*, you run an experiment. Vertex AI SDK automatically creates an experiment run by logging all paramenters and metrics in Vertex AI Experiments.

With *user experiment run creation*, you create an experiment using `vertex_ai.start_run(your-experiment-run-name)` and run the experiment. Then you get access to resulting paramentes and metrics after you end the experiment run with `vertex_ai.end_run()`


#### Create an experiment for tracking training parameters and metrics

To start, initiate an experiment using the `init()` method.

Because some model types like TensorFlow result in autologging time series metrics, you need to create a TensorBoard instance.

To create a TensorBoard instance, you can use `vertex_ai.Tensorboard.create()`.


<div class="alert alert-danger">Notice that if you did not activate yet, Vertex AI TensorBoard charges a monthly fee of $300 per unique active user. Learn more about [TensorBoard overview](https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-overview). </div>


In [13]:
autologged_experiment_name = f"autologging-experiment-3modeltypes"

In [14]:
#experiment_tensorboard = vertex_ai.Tensorboard.create()
vertex_ai.init(
    project=PROJECT_ID,
    location=REGION,
    staging_bucket=BUCKET_URI,
    experiment=autologged_experiment_name,
#    experiment_tensorboard=experiment_tensorboard,
    experiment_description="autolog-experiment-with-automatic-run",
)

#### Autologging an experiment with automatic experiment run creation

In this section, Vertex AI SDK automatically creates an experiment run for you by logging all paramenters and training and post-training metrics in Vertex AI Experiments.


##### Enable autologging

First, enable autologging using `vertex_ai.autolog()` method.

After calling `vertex_ai.autolog()`, any metrics and parameters from
model training calls with supported ML frameworks will be automatically
logged to Vertex Experiments.

In [15]:
vertex_ai.autolog()

##### Run baseline experiment

Next, define your baseline model by running a Sklearn model experiment.

In [16]:
sklearn_config = dict(data_path=processed_data_filepath, test_size=0.2, max_depth=5)
train_sklearn_model(**sklearn_config)

Reading data...
Generating train and test data...
Building pipeline...
Training model...
Associating projects/903223461273/locations/europe-west4/metadataStores/default/contexts/autologging-experiment-3modeltypes-sklearn-2023-04-20-21-32-55-20aee to Experiment: autologging-experiment-3modeltypes


INFO:google.cloud.aiplatform.metadata.experiment_resources:Associating projects/903223461273/locations/europe-west4/metadataStores/default/contexts/autologging-experiment-3modeltypes-sklearn-2023-04-20-21-32-55-20aee to Experiment: autologging-experiment-3modeltypes


Evaluating model...
accurancy 0.934


##### Get the experiment results

Then, use the method `get_experiment_df()` to get the results of the experiment as a pandas dataframe.

Notice how all paramenters and metrics are logged in Vertex AI Experiments.

In particular, the `run_name` has been automatically assigned and the `accurancy_score` metrics you defined has been logged too.

In [17]:
experiment_df = vertex_ai.get_experiment_df()
experiment_df = experiment_df.T
experiment_df

Unnamed: 0,0,1,2,3
experiment_name,autologging-experiment-3modeltypes,autologging-experiment-3modeltypes,autologging-experiment-3modeltypes,autologging-experiment-3modeltypes
run_name,sklearn-2023-04-20-21-32-55-20aee,xgboost-2023-04-20-18-42-20-f4df0,tensorflow-2023-04-20-17-40-17-5becd,sklearn-2023-04-20-17-39-46-f4bb4
run_type,system.ExperimentRun,system.ExperimentRun,system.ExperimentRun,system.ExperimentRun
state,COMPLETE,COMPLETE,COMPLETE,COMPLETE
param.encoder__drop_invariant,False,,,False
...,...,...,...,...
metric.val_loss,,,1.198227,
time_series_metric.loss,,,1.197857,
time_series_metric.val_accuracy,,,0.922319,
time_series_metric.val_loss,,,1.198227,


#### Experiment 1 - XGBoost model

Preprocess: XGBoost can only work with data types of int, float, bool, or category. The columns buying, maint, doors, persons, lug_boot, and safety are all of type object, which is not compatible with XGBoost. 

To fix this, let's convert the data types of these columns to one of the compatible types. We can do this using the pandas.DataFrame.astype() method:



In [24]:
xgboost_config = dict(data_path=processed_data_filepath, test_size=0.2, max_depth=10, n_estimators=200)
train_xgboost_model(**xgboost_config)

Reading data...
Generating train and test data...
Training model...
Associating projects/903223461273/locations/europe-west4/metadataStores/default/contexts/autologging-experiment-3modeltypes-xgboost-2023-04-20-21-36-44-c2144 to Experiment: autologging-experiment-3modeltypes


INFO:google.cloud.aiplatform.metadata.experiment_resources:Associating projects/903223461273/locations/europe-west4/metadataStores/default/contexts/autologging-experiment-3modeltypes-xgboost-2023-04-20-21-36-44-c2144 to Experiment: autologging-experiment-3modeltypes


Evaluating model...
accuracy 0.986


#### Experiment 2 - TensorFlow model.

In [19]:
tf_config = dict(
    data_path=processed_data_filepath, test_size=0.2, batch_size=5, epochs=3
)
train_tensorflow_model(**tf_config)

Reading data...
Generating train and test data...
Building model...
Training model...
Associating projects/903223461273/locations/europe-west4/metadataStores/default/contexts/autologging-experiment-3modeltypes-tensorflow-2023-04-20-21-33-50-eb9c9 to Experiment: autologging-experiment-3modeltypes


INFO:google.cloud.aiplatform.metadata.experiment_resources:Associating projects/903223461273/locations/europe-west4/metadataStores/default/contexts/autologging-experiment-3modeltypes-tensorflow-2023-04-20-21-33-50-eb9c9 to Experiment: autologging-experiment-3modeltypes


Epoch 1/3
   6/1383 [..............................] - ETA: 17s - loss: 1.5425 - accuracy: 0.9000    



Epoch 2/3
Epoch 3/3


##### Compare experiment results

In [20]:
experiment_df = vertex_ai.get_experiment_df()
experiment_df.T

Unnamed: 0,0,1,2,3,4,5
experiment_name,autologging-experiment-3modeltypes,autologging-experiment-3modeltypes,autologging-experiment-3modeltypes,autologging-experiment-3modeltypes,autologging-experiment-3modeltypes,autologging-experiment-3modeltypes
run_name,tensorflow-2023-04-20-21-33-50-eb9c9,xgboost-2023-04-20-21-33-34-86379,sklearn-2023-04-20-21-32-55-20aee,xgboost-2023-04-20-18-42-20-f4df0,tensorflow-2023-04-20-17-40-17-5becd,sklearn-2023-04-20-17-39-46-f4bb4
run_type,system.ExperimentRun,system.ExperimentRun,system.ExperimentRun,system.ExperimentRun,system.ExperimentRun,system.ExperimentRun
state,COMPLETE,COMPLETE,COMPLETE,COMPLETE,COMPLETE,COMPLETE
param.epochs,3,,,,3,
...,...,...,...,...,...,...
metric.training_roc_auc,,,0.977436,,,0.977436
time_series_metric.loss,1.197857,,,,1.197857,
time_series_metric.val_accuracy,0.922319,,,,0.922319,
time_series_metric.val_loss,1.198227,,,,1.198227,
