In [None]:
#install packages
pip install --user witwidget


In [None]:
import pandas as pd
import numpy as np
from witwidget.notebook.visualization import WitConfigBuilder
from witwidget.notebook.visualization import WitWidget
from sklearn.linear_model import LogisticRegression
import google.cloud.aiplatform as aiplatform


In [None]:
%%bigquery 
SELECT
  age,
  workclass,
  marital_status,
  education_num,
  occupation,
  hours_per_week,
  income_bracket
FROM
  `bigquery-public-data.ml_datasets.census_adult_income`
LIMIT
  100;

In [None]:
%%bigquery
CREATE OR REPLACE VIEW
  `census.input_view` AS
SELECT
  age,
  workclass,
  marital_status,
  education_num,
  occupation,
  hours_per_week,
  income_bracket,
  CASE
    WHEN MOD(functional_weight, 10) < 8 THEN 'training'
    WHEN MOD(functional_weight, 10) = 8 THEN 'evaluation'
    WHEN MOD(functional_weight, 10) = 9 THEN 'prediction'
  END AS dataframe
FROM
  `bigquery-public-data.ml_datasets.census_adult_income`

In [None]:
%%bigquery
CREATE OR REPLACE MODEL
  `census.census_model`
OPTIONS
  ( model_type='LOGISTIC_REG',
    auto_class_weights=TRUE,
    input_label_cols=['income_bracket']
  ) AS
SELECT
  * EXCEPT(dataframe)
FROM
  `census.input_view`
WHERE
  dataframe = 'training'
    

In [None]:
%%bigquery input_df
SELECT * EXCEPT (dataframe) FROM `census.input_view` 
WHERE dataframe = 'training'

In [None]:
input_df.head()

In [None]:
transform_df = pd.get_dummies(input_df,
                     columns = ['workclass', 'marital_status', 'occupation', 'education_num'])
transform_df

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
X = transform_df.drop('income_bracket', axis=1)
y = transform_df["income_bracket"]

In [None]:
X = X[['hours_per_week', 'age']]

In [None]:
X.head()

In [None]:
y.head()

In [None]:
clf = LogisticRegression(random_state=0).fit(X, y)

In [None]:
clf.get_params()

In [None]:
clf.feature_names = list(X.columns.values)

In [None]:
clf.feature_names

In [None]:
clf.feature_names_in_

In [None]:
clf.predict(X[:1])

In [None]:
import joblib
joblib.dump(clf, 'model.joblib')

In [None]:
new_clf = joblib.load('model.joblib')
clf.predict(X[:1])

In [None]:
!gsutil cp model.joblib gs://felipe-sandbox/logistic_regression/

In [None]:
%%writefile explanation-metadata.json

{
  "inputs": {
    "features": {"index_feature_mapping": ["hours_per_week","age"], "encoding": "BAG_OF_FEATURES"}
  },
  "outputs": {
    "income_bracket": {
    }
  }
}

In [None]:
!gsutil ls gs://felipe-sandbox/logistic_regression

In [None]:
%%bash
gcloud ai models upload \
  --region=us-central1 \
  --display-name=logistic_regression_xai_v3 \
  --container-image-uri=us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest \
  --artifact-uri=gs://felipe-sandbox/logistic_regression \
  --explanation-method=sampled-shapley \
  --explanation-path-count=10 \
  --explanation-metadata-file=explanation-metadata.json

In [None]:
import google.cloud.aiplatform as aip
aip.init(project="felipe-sandbox", staging_bucket="gs://felipe-sandbox")


In [None]:
REGION="us-central1"

In [None]:

XAI = "shapley"  # [ shapley, ig, xrai ]

if XAI == "shapley":
    PARAMETERS = {"sampled_shapley_attribution": {"path_count": 10}}
elif XAI == "ig":
    PARAMETERS = {"integrated_gradients_attribution": {"step_count": 50}}
elif XAI == "xrai":
    PARAMETERS = {"xrai_attribution": {"step_count": 50}}

parameters = aip.explain.ExplanationParameters(PARAMETERS)

COLUMNS = [
    "hours_per_week",
    "age"
]
metadata = aip.explain.ExplanationMetadata(
    inputs={
        "features": {"index_feature_mapping": COLUMNS, "encoding": "BAG_OF_FEATURES"}
    },
    outputs={"income_bracket": {}},
)

MODEL_DIR = "felipe-sandbox" + "/model"

DEPLOY_VERSION = "sklearn-cpu.0-23"
DEPLOY_IMAGE = "{}-docker.pkg.dev/vertex-ai/prediction/{}:latest".format(
    REGION.split("-")[0], DEPLOY_VERSION
)

In [None]:
model = aip.Model.upload(
    display_name="logistic_regression_v4",
    artifact_uri="gs://felipe-sandbox/logistic_regression",
    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest",
    explanation_parameters=parameters,
    explanation_metadata=metadata,
    sync=False,
)

model.wait()

In [None]:
model.list

In [None]:
DEPLOYED_NAME = "logistic-regression-endpoint-v4" 

TRAFFIC_SPLIT = {"0": 100}

endpoint = model.deploy(
    deployed_model_display_name=DEPLOYED_NAME,
    traffic_split=TRAFFIC_SPLIT,
    machine_type="n1-standard-4",
    min_replica_count=1,
    max_replica_count=1,
)

In [None]:
INSTANCE = [
    10,10
]
instances = [INSTANCE]

In [None]:
endpoint.predict(instances=instances)

In [None]:
prediction = endpoint.explain(instances=instances)
print(prediction)

In [None]:
help(endpoint.explain)

In [None]:
endpoint.explain(instances=[{"age":10,"hours_per_week":10}])

In [None]:
gcloud ai endpoints create \
  --region=us-central1 \
  --display-name=logistic_regression_xai_v2

In [None]:
num_wit_examples = 500
test_examples = np.hstack((X[:num_wit_examples],y.values.reshape(-1,1)[:num_wit_examples].reshape(-1,1)))
test_examples[0:3]

In [None]:
num_data = 500
tool_height = 500
config_builder = (WitConfigBuilder(test_examples.tolist(), X.columns.tolist() + ['income_bracket'])
  .set_custom_predict_fn(clf.predict_proba)
  .set_target_feature('income_bracket'))
WitWidget(config_builder, height=tool_height)

In [None]:
np.array(X.loc[0].values.flatten().tolist())

In [None]:
%%writefile input.json

{
  "instances" : [
      [4.8, 3]
  ]
}

In [None]:
!gcloud ai endpoints explain 8246258043482800128 \
  --region=us-central1 \
  --json-request=input.json

In [None]:
def explain_tabular_sample(
    project: str, location: str, endpoint_id: str, instance_dict: dict
):

    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint(endpoint_id)

    response = endpoint.explain(instances=[instance_dict], parameters={})

    for explanation in response.explanations:
        print(" explanation")
        # Feature attributions.
        attributions = explanation.attributions
        for attribution in attributions:
            print("  attribution")
            print("   baseline_output_value:", attribution.baseline_output_value)
            print("   instance_output_value:", attribution.instance_output_value)
            print("   output_display_name:", attribution.output_display_name)
            print("   approximation_error:", attribution.approximation_error)
            print("   output_name:", attribution.output_name)
            output_index = attribution.output_index
            for output_index in output_index:
                print("   output_index:", output_index)

    for prediction in response.predictions:
        print(prediction)

In [None]:
instances = [[77, 10]]

In [None]:
from google.cloud import aiplatform


In [None]:
explain_tabular_sample("felipe-sandbox", "us-central1", "7246881138671616000", instances)

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
import joblib

class IrisClassifier:
    def __init__(self):
        self.X, self.y = load_iris(return_X_y=True)
        self.clf = self.train_model()
        self.iris_type = {
            0: 'setosa',
            1: 'versicolor',
            2: 'virginica'
        }

    def train_model(self) -> LogisticRegression:
        return LogisticRegression(solver='lbfgs',
                                  max_iter=1000,
                                  multi_class='multinomial').fit(self.X, self.y)

    def predict(self, features: dict):
        
        X = [features['sepal_length'], features['sepal_width'], features['petal_length'], features['petal_width']]
        print(X)
        prediction = self.clf.predict_proba([X])
        return {'class': self.iris_type[np.argmax(prediction)],
                'probability': round(max(prediction[0]), 2)}

model = IrisClassifier()
joblib.dump(model, "model.joblib")

In [None]:
model.predict(features={"sepal_length": 4.8,"sepal_width": 3,"petal_length": 1.4,"petal_width": 0.3})