## Installation setup

In [None]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME") and not os.getenv("VIRTUAL_ENV")

IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists("/opt/deeplearning/metadata/env_version")

In [None]:
!pip3 install --user -q --upgrade google-cloud-aiplatform \
                                  google-cloud-storage \
                                  tensorflow

Restart kernel

## Set variables

In [None]:
# Set values acc. to your project/bucket
PROJECT_ID = ""
REGION = "us-central1"
BUCKET_NAME = ""
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [None]:
!gsutil ls -al $BUCKET_URI

# AutoML Tabular

In [None]:
import pandas as pd
import google.cloud.aiplatform as aip

In [None]:
aip.init(project=PROJECT_ID,
         staging_bucket=BUCKET_URI)

### Data preparation

Check whether data is compatible with requirements of Vertex AI - AutoML - tabular data - classification. Make changes, if required.

Reference: https://cloud.google.com/vertex-ai/docs/tabular-data/classification-regression/prepare-data

In [None]:
# Hint:
df_train = pd.read_csv("<GCS-path-of-training-data>")
    
df_train.columns

# Write your code here

In [None]:
IMPORT_FILE = "<GCS-path-of-csv-file-to-be-used-by-AutoML-Tabular-classification>"

df_train.head()

In [None]:
label_column = "<name-of-target-column>"

### Create Vertex AI Dataset

In [None]:
dataset = aip.TabularDataset.create(
                                    display_name="income-bracket-prediction",
                                    gcs_source=[IMPORT_FILE]
                                   )

print(dataset.resource_name)

### Model training

In [None]:
# Fill appropriate values for the arguments below:
training_job = aip.AutoMLTabularTrainingJob(
                                            display_name="",
                                            optimization_prediction_type="<type-of-ML-problem>",
                                            optimization_objective="choose-your-metric",
                                           )

print(training_job)

In [None]:
# Fill appropriate values for the arguments below:
model = training_job.run(
                         dataset=,
                         model_display_name=,
                         training_fraction_split=,
                         validation_fraction_split=,
                         test_fraction_split=,
                         budget_milli_node_hours=,
                         disable_early_stopping=,
                         target_column=,
                       )

## Batch prediction

Data preparation: Repeat the preprocessing (if any) performed before model training.

In [None]:
df_test = pd.read_csv("<GCS-path-of-test-data>")

columns_list = list(df_test.columns)

# Write your code here

In [None]:
# Fill appropriate values for below arguments..
batch_predict_job = model.batch_predict(
                                        job_display_name=,
                                        gcs_source=,  # csv filepath for inferencing
                                        gcs_destination_prefix=BUCKET_URI,
                                        instances_format="csv",
                                        predictions_format="jsonl",
                                        generate_explanation=True,
                                        sync=False,
                                       )

print(batch_predict_job)

## Model explainability

In [None]:
bp_iter_outputs = batch_predict_job.iter_outputs()

type(bp_iter_outputs)

Check GCS bucket for any file(s) with model explanations

## Online prediction

Deploy to endpoint

In [None]:
endpoint = model.deploy(
                        machine_type="<Choose-your-machine-type>",
                       )

Prepare/Send a payload for online prediction

In [None]:
prediction = endpoint.predict(
                                [
                                    {
                                        "age": "",
                                        "workclass": "",
                                        "fnlwgt": "",
                                        "education": "",
                                        "education_num": "",
                                        "marital_status": "",
                                        "occupation": "",
                                        "relationship": "",
                                        "race": "",
                                        "sex": "",
                                        "capital_gain": "",
                                        "capital_loss": "",
                                        "hours_per_week": "",
                                        "native_country": "",
                                    }
                                ]
                            )

print(prediction)